#' title: "Agenda Seeding: Data Analysis"
#' date: "`r Sys.Date()`"
#' output: pdf_document



## ---- spin_code, eval = FALSE, include = FALSE ----
# spin code to output Rmd / Rnw
# set output_format to "html_document" for html
# rmarkdown::render(input = here::here("code/APSR_protests3_1_rep.R"), output_format = "pdf_document", clean = FALSE)


## ---- setup_initial, echo = FALSE, include = FALSE ----
library(knitr)
library(here) 

set.seed(12345678)

opts_chunk$set( fig.lp = "fig:", echo = FALSE, message = FALSE, warning = FALSE, error = FALSE)


## ---- set_global_parameters, include = FALSE ----
## set some global R code parameters

max_black_threshold <- 10
white_percent       <- (100 - max_black_threshold)

arr_thresh          <- 10
dis_thresh          <- 100

# for stargazer
star.cut.vector     <- c(0.05, NA, NA)

# protest 'treatment' parameters 
distance  <- 100
time      <- 365 * 2
intensity <- 10



## ---- load_packages, include = FALSE ----
## Set CRAN repository
local({
    r <- getOption("repos")
    r["CRAN"] <- "http://lib.stat.cmu.edu/R/CRAN/"
    options(repos = r)
})

source(here("code/source_libraries.R"))



## ---- load_ggplot_themes ----
## Custom ggplot theme based on cowplot

require(cowplot)

theme_apsr <- theme_cowplot() +
    theme(
        panel.grid.major = element_line(size = .5, color = "grey90"),
        axis.text        = element_text(size = 12),
        strip.text       = element_text(size=12, lineheight=0.5)
    )
                        
theme_apsr_grid <- theme_cowplot() +
    theme(
        panel.grid.major = element_line(size = .5, color = "grey90"),
        axis.text        = element_text(size = 12),
        strip.text       = element_text(size = 12, lineheight=0.5),
        panel.grid.minor = element_line(size = .25, color = "grey90")
  ) 



theme_apsr_coef <- theme_cowplot() +
  theme(
    panel.grid.major = element_line(size = .5, color = "grey90"),
    axis.text = element_text(size = 12),
    strip.text = element_text(size=12, lineheight=0.5),
    panel.grid.minor = element_line(size = .25, color = "grey90"),
    
    plot.title = element_text(size = 18),
      text = element_text(size = 18),
      axis.text.x = element_text(
        angle = 0,
        hjust = 0,
        vjust = 0
      )
    )


# shadowtext::geom_shadowtext background ratio (how much to expand white outline)
bg_ratio <- 0.25


## ---- load_minor_functions, include = FALSE ----
short <- function(x) {
    format(
        round(x, 2),
        digits = 1,
        scientific = F,
        big.mark = ",",
        nsmall = 1
    )
}

fiver <- function(x) {
    str_pad(
        string = x,
        width = 5,
        side = "left",
        pad = "0"
    )
}

# robust standard errors through broom::tidy
rse <- function(x) {
    suppressWarnings(robust.se(x)) %>% 
        broom::tidy() %>% 
        pull(std.error) %>% 
        unlist()
}

# for cleaning covariate labels when printing stargazer tables to html
remove_latex <- function(x){
    x %>% 
        str_replace_all("\\\\", "") %>% 
        str_replace_all("\\^", "") %>%
        str_replace_all("_", " ") %>% 
        str_replace_all("\\$", "") %>% 
        str_replace_all("`", "'") 
}

clean_covar_labels <- function(cl) {
    cl <- case_when(
            knitr::is_html_output() ~ remove_latex(cl),
            TRUE                    ~ cl
    )
}


## ---- load_processed_protest_data_updated, include = FALSE ----

# load combined electoral, demographic, carter, rain, dca data
load(here("data/voting_census_carter_rain_dca.Rdata"), verbose = TRUE)




## ---- word_functions, include = FALSE ----

# adds comma for printing numbers, from scales package by Hadley Wickham
add_comma <- function(x, ...) {
  format(x, ..., big.mark = ",", scientific = FALSE, trim = TRUE)
}

number_to_word <- function(x) {
  ifelse(x > 10, x, c("one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten")[x])
}


## ---- load_custom_fuctions ----
se.func <- function(xx) {
  cov1 <- vcov(xx)
  se <- sqrt(diag(cov1))
  return(se)
}

kable_format <- function() case_when(
      knitr::is_latex_output() ~ "latex",
      knitr::is_html_output()  ~ "html",
      TRUE                     ~ "markdown"
  )

star_format <- case_when(
      knitr::is_latex_output() ~ "latex",
      knitr::is_html_output()  ~ "html",
      TRUE                     ~ "text"
  )



p_val <- function(xx) {
  case_when(
    xx < 0.0001               ~ "$p$ < 0.0001",
    xx >= 0.0001 & xx < 0.001 ~ "$p$ < 0.001",
    xx >= 0.001  & xx < 0.01  ~ "$p$ < 0.01",
    xx >= 0.01   & xx < 0.05  ~ "$p$ < 0.05",
    xx >= 0.05                ~ "$p$ > 0.05"
    #TRUE ~ as.character(x)
  ) 
}


robust.se.func <- function(xx) {
  cov1 <- vcovHC(xx, cluster = "group")
  robust.se <- sqrt(diag(cov1))
  return(robust.se)
}



## ---- load_state_maps ----
# load early to prevent ggplot / tidyverse conflicts in later choropleths
gg_states <- ggplot2::map_data("state")



## ---- mostimportantproblem1950, fig.width=9, fig.height=5, fig.pos="H", fig.cap="Public opinion on the `Most Important Problem,' 1950 to 1979." ----
# , fig.pos="h!"
# cache.var

# save a regular plotting function
theme_set(theme_cowplot())
regular_plot <- knit_hooks$get("plot")

# Custom hook for Note from:
# https://stackoverflow.com/questions/36047072/knitr-add-figure-notes

# Custom knitr hook to add notes to the plot
knit_hooks$set(plot = function(x, options) {
  paste("\n\\end{kframe}\n\\begin{figure}\n",
    "\\includegraphics[width=\\maxwidth]{",
    opts_knit$get("base.url"), paste(x, collapse = "."),
    "}\n",
    "\\begin{minipage}{6in}\\small\\begin{flushleft}\\textit{Note}: Scatter plot uses loess-smoothed trend lines. Each letter represents the percentage of people answering that a particular issue is the most important problem in America in a single poll. poll. Data sources: \\citet{niemi1989trends, loo2004polls}. \\end{flushleft}\\end{minipage}\\vspace*{2mm}",
    "\\caption{", options$fig.cap, " \\label{", options$fig.lp, opts_current$get("label"), "}}\n",
    "\n\\end{figure}\n\\begin{kframe}\n",
    sep = ""
  )
})



polls <- read.csv(here("data/polls1950-1979-date.csv"))


polls$Social.Control <- imputeTS::na_interpolation(polls$Social.Control)
polls$Civil.Rights   <- imputeTS::na_interpolation(polls$Civil.Rights)


polls$Date <- as.Date(polls$Date, format = "%m/%d/%Y")
polls[, c("Foreign.Affairs", "Economic", "Social.Control", "Civil.Rights")] <- polls[, c("Foreign.Affairs", "Economic", "Social.Control", "Civil.Rights")] * .01

sp  <- 0.15  # span
sz  <- 2.5   # size
lwt <- 1.0   # line weight
alp <- 0.4   # alpha


# Viridis
colors_four <- viridis::viridis(begin = 0, end = .9, n = 4, alpha = .9, direction = -1)


p <-
  ggplot(data = polls, aes(x = Date, y = Foreign.Affairs)) +
  xlab("") +
  ylab("Percent Saying 'Most Important Problem'") +
  # ggtitle("Public Opinion on 'Most Important Problem,' 1950-1980") +
  geom_point(aes(Date, Economic), size = sz, shape = "e", alpha = alp, color = colors_four[1]) + # "dark blue"
  geom_smooth(
    aes(Date, Economic),
    se       = FALSE,
    color    = colors_four[1], # "dark blue",
    span     = sp,
    size     = lwt,
    linetype = 1 # "21"
  ) +
  scale_y_continuous(label = scales::percent_format(accuracy = 1)) +
  scale_x_date(date_labels = "%Y", 
               date_breaks = "2 years"#, 
               #limits = c(as.Date("1950-01-01"), as.Date("1980-01-01")),
               #expand = c(0, 0)
  )

p <-
  p + geom_point(aes(Date, Social.Control),
    size = sz, shape = "s",
    alpha = alp, color = colors_four[2]
  ) + # "dark red"
  geom_smooth(
    aes(Date, Social.Control),
    se       = FALSE,
    color    = colors_four[2], # "dark red",
   span      = sp,
    size     = lwt,
    linetype = 1 #"91" # "41"
  )

p <-
  p + geom_point(aes(Date, Foreign.Affairs),
    size = sz, shape = "f",
    alpha = alp, 
    color = colors_four[3], #"gray50",  
  ) + # colors_four[3]
  geom_smooth(
    aes(Date, Foreign.Affairs),
    se       = FALSE,
    color    = colors_four[3], # "gray50"
    span     = sp,
    size     = lwt,
    linetype = 1 # "6131"
  )

p <-
  p + geom_point(aes(Date, Civil.Rights),
    size = sz, shape = "c",
    alpha = alp, color = colors_four[4]
  ) + # "dark green"
  geom_smooth(
    aes(Date, Civil.Rights),
    se       = FALSE,
    color    = colors_four[4], # "dark green",
    span     = sp,
    size     = lwt,
    linetype = 1 #"51" # "9111"
  )
tx <- 4




p <- p + geom_shadowtext(
    aes(label = label),
    data.frame(
        Foreign.Affairs = .525,
        Date  = as.Date("1950-03-01"),
        label = "Foreign Affairs (f)"
    ),
    hjust    = 0,
    size     = tx,
    col      = 'black',
    bg.color = 'white',
    bg.r     = bg_ratio
) +
    geom_shadowtext(
        aes(label = label),
        data.frame(
            Foreign.Affairs = .315,
            Date = as.Date("1950-03-01"),
            label = "Economy (e)"
        ),
        hjust    = 0,
        size     = tx,
        col      = 'black',
        bg.color = 'white',
        bg.r     = bg_ratio
    ) +
    geom_shadowtext(
        aes(label = label),
        data.frame(
            Foreign.Affairs = .125,
            Date = as.Date("1950-03-01"),
            label = "Social Control (s) "
        ),
        hjust    = 0,
        size     = tx,
        col      = 'black',
        bg.color = 'white',
        bg.r     = bg_ratio
    ) +
    geom_shadowtext(
        aes(label = label),
        data.frame(
            Foreign.Affairs = .025,
            Date = as.Date("1950-03-01"),
            label = "Civil Rights (c)"
        ),
        hjust    = 0,
        size     = tx,
        col      = 'black',
        bg.color = 'white',
        bg.r     = bg_ratio
    )

p <-
    p + geom_vline(
        xintercept = as.numeric(c(
            as.Date("1960-11-08"),
            as.Date("1964-11-03"),
            as.Date("1968-11-05"),
            as.Date("1972-11-07")
        )),
        linetype = "13",
        alpha    = .7,
        size     = .75
    ) +
    geom_shadowtext(
        aes(label = label),
        data.frame(
            Foreign.Affairs = .72,
            Date = as.Date("1954-10-01"),
            label = "Presidential Elections\n(1960-1972)"
        ),
        hjust    = 0,
        size     = tx,
        col      = 'black',
        bg.color = 'white',
        bg.r     = bg_ratio
    )


p + theme_apsr_grid





## ---- plm_calc_prep ----

# deprecated?
county_protest_prep <- function(counties, protests) {

  counties$fips    <- fiver(counties$fips)
  protests$FIPSKEY <- fiver(protests$FIPSKEY)
  # subset columns
  p.cols <- grep("FIPSKEY|^Y60PDEM|^Y64PDEM|^Y68PDEM|^Y72PDEM|^R.EFFECT64|^R.EFFECT68|^R.EFFECT72", names(protests))

  p2 <- protests[, p.cols]

  # rename and clean column names
  names(p2) <- c("FIPSKEY", "PDEM60", "PDEM64", "PDEM68", "PDEM72", "P.EFFECT64", "P.EFFECT68", "P.EFFECT72")

  names(p2) <- tolower(names(p2))
  names(p2) <- sub("\\.", "\\_", names(p2))

  # merge county data and protest data
  c3 <- merge(counties, p2, by.x = "fips", by.y = "fipskey")

  c3$plag64 <- c3$pdem60
  c3$plag68 <- c3$pdem64
  c3$plag72 <- c3$pdem68

  # create blank year vals every two years for 
  # time series imputation at two year intervals 
  c3$plag62 <- NA
  c3$plag66 <- NA
  c3$plag74 <- NA
  c3$plag76 <- NA
  c3$plag78 <- NA
  
  
  # pivot c3 to long format with fips, variable, value
  c4 <- melt(c3, id = c("fips"))

  # split variable-year
  temp <- strsplit(as.character(c4$variable), "(?<=[[:alpha:]])(?=[[:digit:]])", perl = TRUE)

  # rebuild data with fips, full var, value, split var, split year
  c5 <- transform(c4, var = sapply(temp, "[", 1), year = sapply(temp, "[", 2))

  # recode year as four digits
  c5$year <- as.numeric(paste("19", c5$year, sep = ""))

  # drop var column
  c5 <- c5[, -grep("variable", names(c5))]

  # create wide df with fips year as key
  c6 <- dcast(c5, fips + year ~ var, value.var = "value")

  dont.interpolate <- grep("fips|year|p_effect|pdem|plag|stfips", names(c6))
  l <- length(names(c6))
  do.interpolate <- base::setdiff(1:l, dont.interpolate)
  do.interpolate.names <- names(c6)[do.interpolate]

  c7 <- c6 %>% 
      group_by(fips) %>% 
      mutate_at(
          vars(do.interpolate.names), 
          function(x) 
              imputeTS::na_interpolation(x, option = "linear")
          ) %>% 
      ungroup()
  
  c7 <- c7 %>% filter(year == 1964 | year == 1968 | year == 1972)
  
  pdata.out <- pdata.frame(c7, index = c("fips", "year")) # , , row.names = TRUE)
#used to be st2
  
  pdata.out$fips_st     <- as.factor(str_sub(pdata.out$fips, 1, 2) )
  pdata.out$fips_county <- as.factor(str_sub(pdata.out$fips, 3, 5) )
  
  pdata.out$south2 <- pdata.out$fips_st %in% c("01", "05", "12", "13", "22", "28", "37", "45", "48", "51")

  pdata.out$per_black_sq      <- (pdata.out$per_black)^2 
  pdata.out$locgov_exp_pc_log <- log(pdata.out$locgov_exp_pc + 1) 
  pdata.out$tot_pop_log       <- log(pdata.out$tot_pop + 1)
  pdata.out$med_income        <- (pdata.out$med_income / 1000)
  
  return(pdata.out)
}






## ---- plm_calc_load_data, include = FALSE ----

load(here("data/county3_demo.Rdata"), verbose = TRUE)



## ---- plm_pdata_calc_updated, include = FALSE ----

# updated pdata
pdata_nv  <- vc2 %>% 
    rename(p_effect = dca_nv_bin) %>% 
    select(-matches('^car_|^dca_|^pwall|^prep|^pto|^pttl|^poth'))

pdata_v   <- vc2 %>% 
    rename(p_effect = dca_v_bin) %>% 
    select(-matches('^car_|^dca_|^pwall|^prep|^pto|^pttl|^poth'))

pdata_v2  <- vc2 %>% 
    rename(p_effect = car_bin) %>% 
    select(-matches('^car_|^dca_|^pwall|^prep|^pto|^pttl|^poth'))

pdata_mlk <- vc2 %>% 
    rename(p_effect = car_mlk_bin) %>% 
    filter(year == 1968) %>% 
    # keep pwall, pttl for 1968 election simulations
    select(-matches('^car_|^dca_|^pto|^poth')) 





## ---- plm_models, include = FALSE ----

save(pdata_nv, pdata_v, pdata_v2, file = here("data/pdata.Rdata"))

form <- pdem ~ p_effect + locgov_exp_pc_log + per_hsplus + per_black + per_black_sq + med_age + med_income + per_unemp + per_urban + tot_pop_log + per_owner_housing + per_pop_growth + per_pop_foreign

form_lag <- pdem ~ p_effect + locgov_exp_pc_log + per_hsplus + per_black + per_black_sq + med_age + med_income + per_unemp + per_urban + tot_pop_log + per_owner_housing + per_pop_growth + per_pop_foreign + plag 

# Mahalanobis match without south due to computation issues
form_match_maha <- p_effect ~ locgov_exp_pc_log + per_hsplus + per_black + per_black_sq + med_age + med_income + per_unemp + per_urban + tot_pop_log + per_owner_housing + per_pop_growth + per_pop_foreign + plag


# plm with specifications
countyplm <- function(plmdata, form) {
    return(plm(form,
               model = "within",
               index = c("fips", "year"),
               data = plmdata)
           )
}




plm.nv <- countyplm( pdata_nv, form)

plm.v  <- countyplm( pdata_v , form)

plm.v2 <- countyplm( pdata_v2, form )

plm.nv.white <- countyplm( pdata_nv[pdata_nv$per_black <= max_black_threshold, ], form)

plm.v.white  <- countyplm( pdata_v [pdata_v$per_black  <= max_black_threshold, ], form)

plm.v2.white <- countyplm( pdata_v2[pdata_v2$per_black <= max_black_threshold, ] , form )


plm.nv.lag <- countyplm( pdata_nv, form_lag)

plm.v.lag  <- countyplm( pdata_v,  form_lag)

plm.v2.lag <- countyplm( pdata_v2, form_lag )


plm.nv.lag.white <- countyplm( pdata_nv[pdata_nv$per_black <= max_black_threshold, ] , form_lag)

plm.v.lag.white <- countyplm( pdata_v[pdata_v$per_black <= max_black_threshold, ] , form_lag)

plm.v2.lag.white <- countyplm( pdata_v2[pdata_v2$per_black <= max_black_threshold, ] , form_lag )


    se1 <- robust.se.func(plm.nv)
    se2 <- robust.se.func(plm.v)
    se3 <- robust.se.func(plm.v2)

    se1.white <- robust.se.func(plm.nv.white)
    se2.white <- robust.se.func(plm.v.white)
    se3.white <- robust.se.func(plm.v2.white)

    se1.lag <- robust.se.func(plm.nv.lag)
    se2.lag <- robust.se.func(plm.v.lag)
    se3.lag <- robust.se.func(plm.v2.lag)

    se1.lag.white <- robust.se.func(plm.nv.lag.white)
    se2.lag.white <- robust.se.func(plm.v.lag.white)
    se3.lag.white <- robust.se.func(plm.v2.lag.white)


mod_stargazer <- function(plm1, plm2, plm3, se1, se2, se3){
    
    covar_labels <- c(
        "Protest `Treatment'",
        "log(PC Local Gov Exp)",
        "\\% HS+ Educ",
        "\\% Black",
        "(\\% Black)$^2$",
        "Median Age",
        "Median Income (000s)",
        "\\% Unemployment",
        "\\% Urban",
        "log(Population)",
        "\\% Owner Occ Housing",
        "\\% Pop Growth",
        "\\% Pop Foreign",
        "Lagged Dem Vote Share"
    )
    
    # requires remove_latex function loaded
    covar_labels <- clean_covar_labels(covar_labels)


# star.cutoffs = c(0.05, 0.01, 0.001)
    stargazer(
        plm1,
        plm2,
        plm3,
        align            = TRUE,
        se               = list(se1, se2, se3),
        type             = star_format,
        float            = FALSE,
        star.cutoffs     = star.cut.vector,
        notes.append     = FALSE,
        notes            = "*$p<0.05$",
        font.size        = 'scriptsize',
        dep.var.caption  = "DV: County-level Democratic Presidential vote-share",
        dep.var.labels   = c(""),
        model.numbers    = TRUE,
        column.labels    = c(
          "Nonviolent Protests (DCA data)",
          "Violent Protests (DCA data)",
          "Violent Protests (Carter data)"
          ),
        float.env        = "table",
        covariate.labels = covar_labels
        )
        #,  )
        

}


# identify index of p_effect 
p_effect_index <- which(names(plm.nv$coefficients) == "p_effect")

coefspan_nv    <- c(plm.nv$coefficients[p_effect_index])
sespan_nv      <- se1.lag[p_effect_index]

cipanlo_nv     <- (coefspan_nv - 1.96 * sespan_nv)
cipanhi_nv     <- (coefspan_nv + 1.96 * sespan_nv)


coefspan_v     <- c(plm.v.lag$coefficients[p_effect_index])
sespan_v       <- se2.lag[p_effect_index]

cipanlo_v      <- (coefspan_v - 1.96 * sespan_v)
cipanhi_v      <- (coefspan_v + 1.96 * sespan_v)

coefspan_v2    <- c(plm.v2.lag$coefficients[p_effect_index])
sespan_v2      <- se3.lag[p_effect_index]

cipanlo_v      <- (coefspan_v - 1.96 * sespan_v)
cipanhi_v      <- (coefspan_v + 1.96 * sespan_v)


## ---- plm_calc_save, include = FALSE ----
save(coefspan_v, sespan_v, coefspan_v2, sespan_v2, 
     file = here("data/plm.coefs.Rdata") )




## ---- summary_stats  ----

summary_statify <- function(data,
                            year         = 1964,
                            #min_tot_pop = 0,
                            #min_black   = 0,
                            #max_black   = 100,
                            row_names    = NA,
                            #type        = "text",
                            verbose      = FALSE) {

  if (verbose) print(dim(data))
    
  data <- data[data$year == year, ] 
  if (verbose) print(dim(data))
  

  data$south2 <- as.numeric(data$south2)
  
  row_names <- c("log(PC Gov Exp)", "Median Age", "Median Inc (000s)", "% Black", "% HS+ Educ", "% Own Occ Hous", "% Pop Foreign", "% Pop Growth", "% Unemployment", "% Urban", "Lag Dem Share", "log(Population)", "% South", "N")
  
  ct <- data %>% 
      filter(p_effect == 0) %>% 
      select(locgov_exp_pc_log, med_age, med_income, per_black, per_hsplus, 
               per_owner_housing, per_pop_foreign, per_pop_growth, 
             per_unemp, per_urban, plag, tot_pop_log, south2)

  if (verbose) print(dim(ct))

  tr <- data %>% 
      filter(p_effect == 1) %>% 
      select(locgov_exp_pc_log, med_age, med_income, per_black, per_hsplus, 
             per_owner_housing, per_pop_foreign, per_pop_growth, 
             per_unemp, per_urban, plag, tot_pop_log, south2)
  
  if (verbose) print(dim(tr))

  
  ctrls    <- colMeans(ct, na.rm = TRUE)
  ctrls_sd <- apply(ct, 2, sd, na.rm = TRUE)
  ctrls_n  <- nrow(ct)

  treat    <- colMeans(tr, na.rm = TRUE)
  treat_sd <- apply(tr, 2, sd, na.rm = TRUE)
  treat_n  <- nrow(tr)

  # mean percent south x 100
  south_row <- which(names(ctrls) == "south2")
  ctrls[south_row] <- ctrls[south_row] * 100
  treat[south_row] <- treat[south_row] * 100

  sum_df <- data.frame(
    controls    = c(ctrls,    ctrls_n),
    controls_sd = c(ctrls_sd, NA),
    treated     = c(treat,    treat_n),
    treated_sd  = c(treat_sd, NA)
  )
  
  # round to 3 digits
  sum_df <- apply(sum_df, 2, round, 3)

  rownames(sum_df) <- row_names
  colnames(sum_df) <-
    c("Controls Mean", "Controls SD", "`Treated' Mean", "`Treated' SD")
  

  # if (type == "text")
  # re-order rows
  
  per_rows         <- which(str_detect(names(ctrls), "^per_"))
  gov_age_inc_rows <- which(str_detect(names(ctrls), "^locgov_|^med_"))
  dem_tot_pop_rows <- which(str_detect(names(ctrls), "^plag|^tot_"))
  N_row            <- length(ctrls) + 1
  
  sum_table <- sum_df[c(per_rows,         # move percents to top together
                        south_row,        # percent south 
                        gov_age_inc_rows, # gov exp, med age, med income
                        dem_tot_pop_rows, # dem vote share, log pop
                        N_row             # N
                        ), ]   
  return(sum_table)
}



## ---- plm_matching  ----

fips_codes <- read.csv(here("data/icpsr_states_fips_census.csv")) %>% clean_names()

fips_codes$statefip <- str_pad(fips_codes$statefip, width = 2, side = "left", pad = "0")

## creating 1964 subset for NV events and 1968 subset for V events
nv <- pdata_nv %>% 
    filter(year == 1964) %>% 
    select(fips, fips_st, p_effect, per_black, tot_pop_log, per_urban, per_pop_foreign, south2) 

nv.white <- nv %>% filter(per_black <= max_black_threshold)

v <- pdata_v %>%
    filter(year == 1968) %>% 
    select(fips, fips_st, p_effect, per_black, tot_pop_log, per_urban, per_pop_foreign, south2)

v.white <- v %>% filter(per_black <= max_black_threshold)

v2 <- pdata_v2 %>% 
    filter(year == 1968) %>% 
    select(fips, fips_st, p_effect, per_black, tot_pop_log, per_urban, per_pop_foreign, south2)


v2.white <- v2 %>% filter(per_black <= max_black_threshold)


caliper <- 0.1


## matching
nv.match <- matchit(
  formula = p_effect ~ per_black + tot_pop_log + per_urban + per_pop_foreign + south2,
  method  = "nearest",
  data    = nv, 
  caliper = caliper
)

nv.data <- match.data(nv.match)

fips_matched   <- nv.data$fips 

pdata_nv_match <- pdata_nv %>% 
    filter(fips %in% fips_matched)



nv.match.white <- matchit(
  formula = p_effect ~ per_black + tot_pop_log + per_urban + per_pop_foreign + south2,
  method  = "nearest",
  data    = nv.white,  
  caliper = caliper
  )

nv.data.white <- match.data(nv.match.white)

fips_matched <- nv.data.white$fips 

pdata_nv_match_white <- pdata_nv %>% 
    filter(fips %in% fips_matched & per_black <= max_black_threshold)


v.match <- matchit(
  formula = p_effect ~ per_black + tot_pop_log + per_urban + per_pop_foreign + south2,
  method  = "nearest",
  data    = v,
  caliper = caliper
)

v.data <- match.data(v.match)

fips_matched <- v.data$fips

pdata_v_match <- pdata_v %>% 
    filter(fips %in% fips_matched)


v.match.white <- matchit(
  formula = p_effect ~ per_black + tot_pop_log + per_urban + per_pop_foreign + south2,
  method  = "nearest",
  data    = v.white,
  caliper = caliper
)
v.data.white <- match.data(v.match.white)

fips_matched <- v.data.white$fips


pdata_v_match_white <- pdata_v %>% 
    filter(fips %in% fips_matched & per_black <= max_black_threshold)


v2.match <- matchit(
  formula = p_effect ~ per_black + tot_pop_log + per_urban + per_pop_foreign + south2,
  method  = "nearest",
  data    = v2, 
  caliper = caliper
)

v2.data <- match.data(v2.match)

fips_matched <- v2.data$fips

pdata_v2_match <- pdata_v2 %>% 
    filter(fips %in% fips_matched)


v2.match.white <- matchit(
  formula = p_effect ~ per_black + tot_pop_log + per_urban + per_pop_foreign + south2,
  method  = "nearest",
  data    = v2.white, 
  caliper = caliper
)

v2.data.white <- match.data(v2.match.white)

fips_matched <- v2.data.white$fips 

pdata_v2_match_white <- pdata_v2 %>% 
    filter(fips %in% fips_matched & per_black <= max_black_threshold)



plm.nv.lag.match       <- countyplm(pdata_nv_match, form_lag)
se1.lag.match          <- robust.se.func(plm.nv.lag.match)

plm.nv.lag.match.white <- countyplm(pdata_nv_match_white, form_lag)
se1.lag.match.white    <- robust.se.func(plm.nv.lag.match.white)

plm.v.lag.match        <- countyplm(pdata_v_match, form_lag)
se2.lag.match          <- robust.se.func(plm.v.lag.match)

plm.v.lag.match.white  <- countyplm(pdata_v_match_white, form_lag)
se2.lag.match.white    <- robust.se.func(plm.v.lag.match.white)

plm.v2.lag.match       <- countyplm(pdata_v2_match, form_lag)
se3.lag.match          <- robust.se.func(plm.v2.lag.match)

plm.v2.lag.match.white <- countyplm(pdata_v2_match_white, form_lag)
se3.lag.match.white    <- robust.se.func(plm.v2.lag.match.white)



## ---- summary.stats.plm, echo=FALSE  ----

row_names <- c("log(PC Gov Exp)", "Median Age", "Median Inc (000s)", "% Black", "% HS+ Educ", "% Own Occ Hous", "% Pop Foreign", "% Pop Growth", "% Unemployment", "% Urban", "Lag Dem Share", "log(Population)", "% South", "N")

summary_nv64 <- summary_statify(
        data        = pdata_nv_match,
        year        = 1964,
        row_names   = row_names)

summary_v68 <- summary_statify(data = pdata_v_match, year = 1968, row_names = row_names)

summary_v268 <- summary_statify(data = pdata_v2_match, year = 1968, row_names = row_names)

save(summary_nv64, summary_v68, summary_v268, file = here("data/summary_stats.Rdata"))




summary_nv64_unmatched <- summary_statify(
        data        = pdata_nv,
        year        = 1964,
        row_names   = row_names)

summary_v68_unmatched <- summary_statify(data = pdata_v, year = 1968, row_names = row_names)

summary_v268_unmatched <- summary_statify(data = pdata_v2, year = 1968, row_names = row_names)

save(summary_nv64_unmatched, summary_v68_unmatched, summary_v268_unmatched,
     file = here("data/summary_stats_unmatched.Rdata"))






## ---- grob_functions, echo=FALSE  ----

annotation_text <- function(text, xoffset, yoffset, textoffset) {
    temp <- annotation_custom(
        text,
        xmin = xoffset,
        xmax = xoffset,
        ymin = textoffset + yoffset,
        ymax = textoffset + yoffset
    )
    return(temp)
}

annotation_bracket <- function(xoffset, yoffset, groupsize, singlestem=NA, doublestem, verbose=FALSE) {
  linejump <- (groupsize / 2) - 0.5
  if (verbose) print(str(data.frame(xoffset, yoffset, groupsize, linejump, doublestem, singlestem)))

  if (is.na(singlestem)) singlestem <- doublestem

  temp1 <- annotation_custom(
    grob = linesGrob(),
    xmin = (xoffset - linejump),
    xmax = (xoffset + linejump),
    ymin = yoffset,
    ymax = yoffset
  )
  temp2 <- annotation_custom(
    grob = linesGrob(),
    xmin = (xoffset - linejump),
    xmax = (xoffset - linejump),
    ymin = (yoffset + doublestem),
    ymax = yoffset
  )
  temp3 <- annotation_custom(
    grob = linesGrob(),
    xmin = (xoffset + linejump),
    xmax = (xoffset + linejump),
    ymin = (yoffset + doublestem),
    ymax = yoffset
  )
  temp4 <- annotation_custom(
    grob = linesGrob(),
    xmin = xoffset,
    xmax = xoffset,
    ymin = (yoffset - singlestem),
    ymax = yoffset
  )
  tt <- list(temp1, temp2, temp3, temp4)
  return(tt)
}



## ---- load_spatial_panel_models, echo=FALSE ----

# run spatial models separately due to processing time
load(here("data/spatial_panel_multi2.Rdata") )


## ---- plm_plots_test, echo=FALSE,  fig.width=12, fig.height=6, fig.pos="h!", fig.cap="Panel Models of Effect of Protest on Vote Share, 1964-1972"  ----


# restore regular plotting function
# knit_hooks$set(plot = regular_plot)

# Custom knitr hook to add notes to the plot
knit_hooks$set(plot = function(x, options) {
  paste("\n\\end{kframe}\n\\begin{figure}\n",
    "\\includegraphics[width=\\maxwidth]{",
    opts_knit$get("base.url"), paste(x, collapse = "."),
    "}\n",
    "\\begin{minipage}{6in}\\small\\begin{flushleft}\\vspace*{2mm} \\textit{Note}: Each point represents a coefficient (along with the 90 and 95\\% confidence intervals) for the estimated effect of protests on change in county-level Democratic vote share in the presidential elections of 1964, 1968 and 1972 with county fixed effects. Models using DCA data measure protest activity using participants $>=10$ and, with Carter data, arrests $>=10$. Other specifications can be seen in the Appendix.\\end{flushleft}\\end{minipage}\\vspace*{2mm}",
    "\\caption{", options$fig.cap, " \\label{", options$fig.lp, opts_current$get("label"), "}}\n",
    "\n\\end{figure}\n\\begin{kframe}\n",
    sep = ""
  )
})
# Appendix, Section \\ref{sec:panel_sweep}

plm_df <-
  data.frame(
    coef  = rep(NA, 15),
    se    = NA,
    n     = NA,
    label = NA,
    group = NA
  )

plm_df$label <-
    rep(c(
        "All counties",
        paste0(white_percent, "% white"),
        "Matched",
        "White matched",
        "Spatial panel"
    ),
    times = 3)
plm_df$group <-
  rep(c(
    "Nonviolent (DCA data)",
    "Violent (DCA data)",
    "Violent (Carter data)"
  ),
  each = 5
  )

# plm_df$group <- factor(plm_df$group, levels = plm_df$group)

plm_df$label2 <- rep(
  c(
    "Nonviolent, All counties\n(DCA data)",
    paste0("Nonviolent, ", white_percent ,"% White counties\n(DCA data)"),
    "Nonviolent, All matched\n(DCA data)",
    "Nonviolent, White matched\n(DCA data)",
    "Nonviolent, Spatial panel\n(DCA data)",
    "Violent, All counties\n(DCA data)",
    paste0("Violent, ", white_percent ,"% White counties\n(DCA data)"),
    "Violent, All matched\n(DCA data)",
    "Violent, White matched\n(DCA data)",
    "Violent, Spatial panel\n(DCA data)",
    "Violent, All counties\n(Carter data)",
    paste0("Violent, ", white_percent ,"% White counties\n(Carter data)"),
    "Violent, All matched\n(Carter data)",
    "Violent, White matched\n(Carter data)",
    "Violent, Spatial panel\n(Carter data)"
  )
)

plm_df$label <- factor(plm_df$label, levels = unique(plm_df$label))


plm_df[1, 1:3] <-
  c(
    plm.nv.lag$coefficients[p_effect_index],
    se1.lag[p_effect_index],
    nrow(plm.nv.lag$model)
  )
plm_df[2, 1:3] <-
  c(
    plm.nv.lag.white$coefficients[p_effect_index],
    se1.lag.white[p_effect_index],
    nrow(plm.nv.lag.white$model)
  )
plm_df[3, 1:3] <-
  c(
    plm.nv.lag.match$coefficients[p_effect_index],
    se1.lag.match[p_effect_index],
    nrow(plm.nv.lag.match$model)
  )
plm_df[4, 1:3] <-
  c(
    plm.nv.lag.match.white$coefficients[p_effect_index],
    se1.lag.match.white[p_effect_index],
    nrow(plm.nv.lag.match.white$model)
  )
plm_df[5, 1:3] <-
  c(
    sp_coef[1],
    sp_se[1],
    length(sp_nv$residuals)
  )

plm_df[6, 1:3] <-
  c(
    plm.v.lag$coefficients[p_effect_index],
    se2.lag[p_effect_index],
    nrow(plm.v.lag$model)
  )
plm_df[7, 1:3] <-
  c(
    plm.v.lag.white$coefficients[p_effect_index],
    se2.lag.white[p_effect_index],
    nrow(plm.v.lag.white$model)
  )
plm_df[8, 1:3] <-
  c(
    plm.v.lag.match$coefficients[p_effect_index],
    se2.lag.match[p_effect_index],
    nrow(plm.v.lag.match$model)
  )
plm_df[9, 1:3] <-
  c(
    plm.v.lag.match.white$coefficients[p_effect_index],
    se2.lag.match.white[p_effect_index],
    nrow(plm.v.lag.match.white$model)
  )
plm_df[10, 1:3] <-
  c(
    sp_coef[2],
    sp_se[2],
    length(sp_v$residuals)
  )

plm_df[11, 1:3] <-
  c(plm.v2.lag$coefficients[p_effect_index], se3.lag[p_effect_index], nrow(plm.v2.lag$model))
plm_df[12, 1:3] <-
  c(
    plm.v2.lag.white$coefficients[p_effect_index],
    se3.lag.white[p_effect_index],
    nrow(plm.v2.lag.white$model)
  )
plm_df[13, 1:3] <-
  c(
    plm.v2.lag.match$coefficients[p_effect_index],
    se3.lag.match[p_effect_index],
    nrow(plm.v2.lag.match$model)
  )
plm_df[14, 1:3] <-
  c(
    plm.v2.lag.match.white$coefficients[p_effect_index],
    se3.lag.match.white[p_effect_index],
    nrow(plm.v2.lag.match.white$model)
  )
plm_df[15, 1:3] <-
  c(
    sp_coef[3],
    sp_se[3],
    length(sp_v2$residuals)
  )


plm_df$label3 <-
  paste0(rep(c(
    "Nonviolent, ", "Violent (O), ", "Violent (C), "
  ), each = 5), plm_df$label, "\n(N=", comma(plm_df$n), ")")
plm_df$label3 <- factor(plm_df$label3, levels = rev(unique(plm_df$label3)))

plm_df$label4 <- paste0(1:15, ". ", plm_df$label, " (N=", comma(plm_df$n), ")")
plm_df$label4 <- factor(plm_df$label4, levels = rev(unique(plm_df$label4)))


interval1 <- -qnorm((1 - 0.90) / 2) # 90 percent multiplier
interval2 <- -qnorm((1 - 0.95) / 2) # 95 percent multiplier


text_nv <-
  textGrob("Nonviolent Protest\n(DCA data)",
    gp = gpar(fontsize = 13, col = "grey20")
  )
text_v1 <-
  textGrob("Violent Protest\n(DCA data)",
    gp = gpar(fontsize = 13, col = "grey20")
  )
text_v2 <-
  textGrob("Violent Protest\n(Carter data)",
    gp = gpar(fontsize = 13, col = "grey20")
  )
# text_brace <- textGrob("{", gp = gpar(fontsize = 124))

p1 <- ggplot(plm_df) + 
    aes(x = label3, y = coef)

p2 <-
  p1 + labs(
    y = "Change in county-level Democratic vote share",
    x = ""
  )

p3 <- p2 + geom_hline(yintercept = 0, col = "darkgray")

p4 <-
  p3 + geom_linerange(
    aes(
      x    = label3,
      ymin = coef - se * interval1,
      ymax = coef + se * interval1
    ),
    lwd      = 1.3,
    position = position_dodge(width = 1 / 5),
    alpha    = .5
  )

p5 <-
  p4 + geom_linerange(
    aes(
      x    = label3,
      ymin = coef - se * interval2,
      ymax = coef + se * interval2
    ),
    position = position_dodge(width = 1 / 5),
    lwd      = .7,
    alpha    = .4
  )

p6 <-
  p5 + geom_point(
    aes(x = label3, y = coef),
    position = position_dodge(width = 1 / 5),
    size = 2.5,
    shape = 19,
    alpha = .6
  )

p7 <-
  p6 + theme(
    plot.title = element_text(size = 18),
    text = element_text(size = 18),
    axis.text.x = element_text(
      angle = 0,
      hjust = 0,
      vjust = 0
    )
  ) + coord_flip()



p8 <-
  p7 + 
    scale_color_discrete(guide = FALSE) +
    scale_x_discrete(labels = rev(plm_df$label4)) + 
    scale_y_continuous(breaks = seq(from = -10, to = 4, by = 2)) + 
    xlab(NULL) +
    theme_cowplot() +
    theme(
        panel.grid.major = element_line(size = .5, color = "grey90"),
        axis.text = element_text(size = 12),
        strip.text = element_text(size=12, lineheight=0.5),
        panel.grid.minor = element_line(size = .25, color = "grey90"),

        plot.title = element_text(size = 18),
        text = element_text(size = 18),
        axis.text.x = element_text(
            angle = 0,
            hjust = 0,
            vjust = 0
          ),
        plot.margin = unit(c(1, 1, 1, 11.25), "lines")
        )


## set offset for text and brackets


yoffset <- 0
textoffset <- -13.2
bracketoffset <- textoffset + 1.5

p8 <-
  p8 + annotation_custom(
    text_nv,
    xmin = 13, # which number on y axis for label
    xmax = 13,
    ymin = textoffset + yoffset,
    ymax = textoffset + yoffset
  ) +
  annotation_custom(
    text_v1,
    xmin = 8,
    xmax = 8,
    ymin = textoffset + yoffset,
    ymax = textoffset + yoffset
  ) +
  annotation_custom(
    text_v2,
    xmin = 3,
    xmax = 3,
    ymin = textoffset + yoffset,
    ymax = textoffset + yoffset
  )

offset <- 13
vert_line <- 2
sstem <- .2 # singlestem
dstem <- .3 # doublestem

p8 <- p8 +
  annotation_custom(
    grob = linesGrob(),
    xmin = offset - vert_line,
    xmax = offset + vert_line,
    ymin = bracketoffset + yoffset,
    ymax = bracketoffset + yoffset
  ) +
  annotation_custom(
    grob = linesGrob(),
    xmin = offset - vert_line,
    xmax = offset - vert_line,
    ymin = bracketoffset + yoffset,
    ymax = bracketoffset + dstem + yoffset
  ) +
  annotation_custom(
    grob = linesGrob(),
    xmin = offset + vert_line,
    xmax = offset + vert_line,
    ymin = bracketoffset + yoffset,
    ymax = bracketoffset + dstem + yoffset
  ) +
  annotation_custom(
    grob = linesGrob(),
    xmin = offset,
    xmax = offset,
    ymin = bracketoffset + yoffset,
    ymax = bracketoffset - sstem + yoffset
  )

offset <- 8
p8 <- p8 +
  annotation_custom(
    grob = linesGrob(),
    xmin = offset - vert_line,
    xmax = offset + vert_line,
    ymin = bracketoffset + yoffset,
    ymax = bracketoffset + yoffset
  ) +
  annotation_custom(
    grob = linesGrob(),
    xmin = offset - vert_line,
    xmax = offset - vert_line,
    ymin = bracketoffset + yoffset,
    ymax = bracketoffset + dstem + yoffset
  ) +
  annotation_custom(
    grob = linesGrob(),
    xmin = offset + vert_line,
    xmax = offset + vert_line,
    ymin = bracketoffset + yoffset,
    ymax = bracketoffset + dstem + yoffset
  ) +
  annotation_custom(
    grob = linesGrob(),
    xmin = offset,
    xmax = offset,
    ymin = bracketoffset + yoffset,
    ymax = bracketoffset - sstem + yoffset
  )

offset <- 3
p8 <- p8 +
  annotation_custom(
    grob = linesGrob(),
    xmin = offset - vert_line,
    xmax = offset + vert_line,
    ymin = bracketoffset + yoffset,
    ymax = bracketoffset + yoffset
  ) +
  annotation_custom(
    grob = linesGrob(),
    xmin = offset - vert_line,
    xmax = offset - vert_line,
    ymin = bracketoffset + yoffset,
    ymax = bracketoffset + dstem + yoffset
  ) +
  annotation_custom(
    grob = linesGrob(),
    xmin = offset + vert_line,
    xmax = offset + vert_line,
    ymin = bracketoffset + yoffset,
    ymax = bracketoffset + dstem + yoffset
  ) +
  annotation_custom(
    grob = linesGrob(),
    xmin = offset,
    xmax = offset,
    ymin = bracketoffset + yoffset,
    ymax = bracketoffset - sstem + yoffset
  )

gt <- ggplot_gtable(ggplot_build(p8))
gt$layout$clip[gt$layout$name == "panel"] <- "off"
grid.draw(gt)



## ---- diff.plot.function,  results='hide', fig.keep='none', include = FALSE  ----
  
# Specify the width of confidence intervals
interval1 <- -qnorm((1 - 0.9) / 2)  # 90 percent multiplier
interval2 <- -qnorm((1 - 0.95) / 2) # 95 percent multiplier

diff.plot <- function(dataframe, title, ylevels=FALSE) {
  p <- ggplot(dataframe, aes(x = labels, y = estimate)) +

    labs(title = paste(title, sep = ""), y = "Change in county-level Democratic vote share", x = "") +

    geom_hline(yintercept = 0, col = "darkgray") +

    geom_linerange(aes(x = labels, ymin = estimate - se * interval1, ymax = estimate + se * interval1), lwd = 1.3, position = position_dodge(width = 1 / 2), alpha = .5) +

    geom_linerange(aes(x = labels, ymin = estimate - se * interval2, ymax = estimate + se * interval2), lwd = .7, alpha = .4) +

    geom_point(aes(x = labels, y = estimate), size = 2.5, shape = 19, alpha = .6) # ,fill = "black" +

  theme(plot.title = element_text(size = 18), text = element_text(size = 18), axis.text.x = element_text(angle = 0)) #+ coord_flip() 

  if (length(ylevels) > 1) p <- p + scale_x_discrete(limits = (ylevels))

  return(p)
}
#



## ---- formulas_ols_iv_models, include = FALSE ----

form_lag <- pdem ~ p_effect + locgov_exp_pc_log + per_hsplus + per_black + per_black_sq + med_age + med_income + per_unemp + per_urban + tot_pop_log + per_owner_housing + per_pop_growth + per_pop_foreign + plag

form_lag_south <- pdem ~ p_effect + locgov_exp_pc_log + per_hsplus + per_black + per_black_sq + med_age + med_income + per_unemp + per_urban + tot_pop_log + per_owner_housing + per_pop_growth + per_pop_foreign + plag + south2

form_match <- p_effect ~ locgov_exp_pc_log + per_hsplus + per_black + per_black_sq + med_age + med_income + per_unemp + per_urban + tot_pop_log + per_owner_housing + per_pop_growth + per_pop_foreign + plag + south2

# For Mahalanobis use form_match_maha due to singularities in matrix
# while matching. Inclue south in regression
form_match_maha <- p_effect ~ locgov_exp_pc_log + per_hsplus + per_black + per_black_sq + med_age + med_income + per_unemp + per_urban + tot_pop_log + per_owner_housing + per_pop_growth + per_pop_foreign + plag


## ---- ols_models, include = FALSE ----

pdata_mlk_white <- pdata_mlk %>% 
  filter(per_black <= max_black_threshold)

ols68.unmatched <- lm(form_lag_south, data = pdata_mlk)

ols68.unmatched.white <- lm(form_lag_south, data = pdata_mlk_white)
# summary(ols68.unmatched)



maha.out <-
  matchit(
    form_match_maha,
    method = "nearest",
    distance = "mahalanobis",
    data = pdata_mlk %>% na.omit()
  )
maha.data <- match.data(maha.out)

# regression includes south
ols68.maha <- lm( form_lag_south, data = maha.data)

maha.out.white <-
  matchit(
    form_match_maha,
    method = "nearest",
    distance = "mahalanobis",
    data = pdata_mlk_white %>% na.omit()
  )
maha.data.white <- match.data(maha.out.white)

# regression includes south
ols68.maha.white <- lm( form_lag_south, data = maha.data.white)



mols68.cbps <- CBPS(
    form_match,
    id    = "fips",
    treat = "p_effect",
    ATT   = FALSE,
    data  = pdata_mlk %>% na.omit()
  )

weights.cbps <- mols68.cbps$weights
cbps.data <- cbind(pdata_mlk %>% na.omit(), weights.cbps)

ols68.cbps <- lm(form_lag_south, weights = weights.cbps, data = cbps.data)

mols68.cbps.white <- CBPS(
  form_match,
  id    = "fips",
  treat = "p_effect",
  ATT   = FALSE,
  data  = pdata_mlk_white %>% na.omit()
)

weights.cbps.white <- mols68.cbps.white$weights
cbps.data.white <- cbind(pdata_mlk_white %>% na.omit(), weights.cbps.white)

ols68.cbps.white <- lm(form_lag_south,
                       weights = weights.cbps.white,
                       data    = cbps.data.white)





## ---- view_mlk_linear_models, results = "asis", eval = FALSE ----

stargazer(
    ols68.unmatched,
    ols68.unmatched.white,
    ols68.maha,
    ols68.maha.white,
    ols68.cbps,
    ols68.cbps.white,
    digits    = 2,
    omit.stat = c("ser", "f", "adj.rsq"),
    type      = star_format
)


## ---- load_vc2, include = FALSE ----

# voting census carter dca and rain data.frame
load(here("data/voting_census_carter_rain_dca.Rdata"), verbose = TRUE)


## ---- iv_models_no_matching_updated ----

iv68.pre123.white <- ivreg(pdem ~ p_effect + locgov_exp_pc_log + per_hsplus + per_black + per_black_sq + med_age + med_income + per_unemp + per_urban + tot_pop_log + per_owner_housing + per_pop_growth + per_pop_foreign + plag + south2  |  . - p_effect + prethreeday, data = pdata_mlk %>% filter(per_black <= max_black_threshold) )


iv68.week.white <- ivreg(pdem ~ p_effect + locgov_exp_pc_log + per_hsplus + per_black + per_black_sq + med_age + med_income + per_unemp + per_urban + tot_pop_log + per_owner_housing + per_pop_growth + per_pop_foreign + plag + south2  |  . - p_effect + rain_week, data = pdata_mlk %>% filter(per_black <= max_black_threshold) )


iv68.month.placebo.white <- ivreg(pdem ~ p_effect + locgov_exp_pc_log + per_hsplus + per_black + per_black_sq + med_age + med_income + per_unemp + per_urban + tot_pop_log + per_owner_housing + per_pop_growth + per_pop_foreign + plag  +  south2   |  . - p_effect + rain_month_placebo, data = pdata_mlk %>% filter(per_black <= max_black_threshold) ) 




## ---- iv_table1, results = 'asis', eval = FALSE ----

stargazer(
    iv68.pre123.white, iv68.week.white, iv68.month.placebo.white,
    se   = list(
        rse(iv68.pre123.white), rse(iv68.week.white), rse(iv68.month.placebo.white)),
    type = star_format
    )



## ---- iv_cbps_matching_updated ----

form_match <- p_effect ~ locgov_exp_pc_log + per_hsplus + per_black + per_black_sq + med_age + med_income + per_unemp + per_urban + tot_pop_log + per_owner_housing + per_pop_growth + per_pop_foreign + plag + south2

match_cbps_out <- CBPS(
    form_match,
    id    = "fips",
    treat = "p_effect",
    ATT   = FALSE,
    data  = pdata_mlk %>% filter(per_black <= max_black_threshold) %>% na.omit()
)

dim(pdata_mlk  %>% filter(per_black <= max_black_threshold) %>% na.omit())
nrow(pdata_mlk %>% filter(per_black <= max_black_threshold) %>% na.omit())

weights_cbps <- match_cbps_out$weights
length(match_cbps_out$weights)

pdata_mlk_cbps  <- bind_cols(
    pdata_mlk %>% filter(per_black <= max_black_threshold) %>% na.omit(), 
    weights_cbps = weights_cbps
    )


iv68.pre123.match.white  <- ivreg(pdem ~ p_effect  + locgov_exp_pc_log + per_hsplus + per_black + per_black_sq + med_age + med_income + per_unemp + per_urban + tot_pop_log + per_owner_housing + per_pop_growth + per_pop_foreign + plag + south2  |  . - p_effect + prethreeday, data = pdata_mlk_cbps, weights = weights_cbps)

iv68.week.match.white <- ivreg(pdem ~ p_effect  + locgov_exp_pc_log + per_hsplus + per_black + per_black_sq + med_age + med_income + per_unemp + per_urban + tot_pop_log + per_owner_housing + per_pop_growth + per_pop_foreign + plag + south2 |  . - p_effect + rain_week, data = pdata_mlk_cbps, weights = weights_cbps)      

iv68.month.placebo.match.white <- ivreg(pdem ~ p_effect  + locgov_exp_pc_log + per_hsplus + per_black + per_black_sq + med_age + med_income + per_unemp + per_urban + tot_pop_log + per_owner_housing + per_pop_growth + per_pop_foreign + plag + south2  |  . - p_effect + rain_month_placebo, data = pdata_mlk_cbps, weights = weights_cbps)


## ---- iv_calc_robust_models_updated, include = FALSE ----

se1.ivpre.white         <- robust.se(iv68.pre123.white)
se2.ivpre.match.white   <- robust.se(iv68.pre123.match.white)
se3.ivweek.white        <- robust.se(iv68.week.white)
se4.ivweek.match.white  <- robust.se(iv68.week.match.white)
se5.ivmonth.white       <- robust.se(iv68.month.placebo.white)
se6.ivmonth.match.white <- robust.se(iv68.month.placebo.match.white)


## ---- prep_iv_p_values, include = FALSE ----
# experiment in converting iv se to robust se for stargazer
# not called
iv68.week.white.robust.se <- iv68.week.white %>% robust.se() %>% suppressMessages() %>% broom::tidy() %>% slice(2) %>% pull(p.value)

iv68.week.match.white.robust.se <- iv68.week.match.white %>% robust.se() %>% suppressMessages() %>% broom::tidy() %>% slice(2) %>% pull(p.value)

## ---- iv_table2, results = 'asis', eval = FALSE ----
# not called, for interactive coding

stargazer(iv68.pre123.match.white, iv68.week.match.white, iv68.month.placebo.match.white,
          se    = list(rse(iv68.pre123.match.white), rse(iv68.week.match.white), rse(iv68.month.placebo.match.white)),
          type  = "text", 
          align = TRUE
)


## ---- iv_table3_prep, include = FALSE ----

models_list <- list(
    iv68.pre123.white,
    iv68.pre123.match.white,
    iv68.week.white,
    iv68.week.match.white,
    iv68.month.placebo.white,
    iv68.month.placebo.match.white
)


# rse function adds to robust.se by using broom::tidy and exporting bare se vector
se_list <- list(
    rse(iv68.pre123.white), 
    rse(iv68.pre123.match.white), 
    rse(iv68.week.white), 
    rse(iv68.week.match.white), 
    rse(iv68.month.placebo.white),
    rse(iv68.month.placebo.match.white)
) 

## ---- iv_table3, results = "asis" ----

covar_labels <- c(
    "Protest `Treatment'",
    "log(PC Local Gov Exp)",
    "\\% HS+ Educ",
    "\\% Black",
    "(\\% Black)$^2$",
    "Median Age",
    "Median Income (000s)",
    "\\% Unemployment",
    "\\% Urban",
    "log(Population)",
    "\\% Owner Occ Housing",
    "\\% Pop Growth",
    "\\% Pop Foreign",
    "Lagged Dem Vote Share",
    "South"
)

# requires remove_latex function loaded
covar_labels <- clean_covar_labels(covar_labels)

stargazer(
          models_list, 
          se               = se_list,
          align            = TRUE,
          float            = FALSE,
          star.cutoffs     = star.cut.vector,
          notes.append     = FALSE, 
          notes            = "*$p<0.05$",
          font.size        = 'scriptsize',
          dep.var.caption  = "DV: County-level Democratic Presidential Vote-share",
          dep.var.labels   = c(""),
          model.numbers    = TRUE,
          column.labels    = c("Placebo (Rain Apr 1-3)",  "Week (Apr 4-10)",  "Placebo (Apr 11-30)"),
          column.separate  = c(2, 2, 2),
          float.env        = "table",
          digits           = 2,
          title            = "",
          covariate.labels = covar_labels,
          omit             = "Constant",
          omit.stat        = c("ser", "f", "adj.rsq"),
          #type            = star_format,
          add.lines        = list(c(
              paste("County at least ", white_percent, "\\% white?", sep = ""),
              "\\textrm{Yes}", "\\textrm{Yes}", "\\textrm{Yes}", "\\textrm{Yes}", "\\textrm{Yes}", "\\textrm{Yes}"),
              c("Matching?", "\\textrm{No}", "\\textrm{Yes}", "\\textrm{No}", "\\textrm{Yes}", "\\textrm{No}", "\\textrm{Yes}")
          ) # add.lines
) # stargazer call  










## ---- rain.instrument.se.calc, include =  FALSE ----

    se1.ivpre.white         <- robust.se(iv68.pre123.white)
    se2.ivpre.match.white   <- robust.se(iv68.pre123.match.white)
    se3.ivweek.white        <- robust.se(iv68.week.white)
    se4.ivweek.match.white  <- robust.se(iv68.week.match.white)
    se5.ivmonth.white       <- robust.se(iv68.month.placebo.white)
    se6.ivmonth.match.white <- robust.se(iv68.month.placebo.match.white)



## ---- iv_stargazer_table, eval=FALSE, results = 'asis' ----
stargazer(
    se1.ivpre.white,
    se2.ivpre.match.white,
    se3.ivweek.white,
    se4.ivweek.match.white,
    se5.ivmonth.white,
    se6.ivmonth.match.white,
    type      = star_format,
    omit.stat = c("f"),
    digits    = 2
    )
## 

    
stargazer(
    iv68.pre123.white,
    iv68.pre123.match.white,
    iv68.week.white,
    iv68.week.match.white,
    iv68.month.placebo.white,
    iv68.month.placebo.match.white,
    type      = star_format,
    omit.stat = c("f", "ser"),
    digits    = 2
)
    

## ---- save.cbps, echo=FALSE  ----
# save(cbps.data, file=here("data", "cbpsdata.Rdata") )


## ---- choro2_calc, eval=TRUE  ----


## convert existing state and county variables to lowercase and strip off extra words
pdata_mlk$state  <- tolower(as.character(pdata_mlk$state))
pdata_mlk$county <- tolower(pdata_mlk$county)

#pdata_mlk$county <- sub("^(.*) (county|city|parish), ..$","\\1", pdata_mlk$county)
pdata_mlk$county <- sub("^(.*) (county|city|parish)","\\1", pdata_mlk$county)

pdata_mlk$county <- sub("^(.*)(\\/.*)","\\1", pdata_mlk$county) # remove second half of name/name

pdata_mlk <- pdata_mlk %>% 
    mutate(
        county = str_replace_all(county, "^saint ","st ")
    )

## create a key that matches the format used by the map package
pdata_mlk$mpname <- paste(pdata_mlk$state, pdata_mlk$county, sep=",")

## correct a few county name differences 
pdata_mlk$mpname[which(pdata_mlk$mpname=="florida,dade")] <- "florida,miami-dade"
pdata_mlk$mpname[which(pdata_mlk$mpname=="florida,okaloosa")] <- "florida,okaloosa:main"




# missing.counties <- base::setdiff(mp, pdata_mlk$mpname)
# l <- length(missing.counties)
# l


#x1small$mpname[which(x1small$mpname=="arizona,la paz")] <- "arizona,yuma"

#x1small <- rbind(x1small$mpname[which(x1small$mpname=="nevada,carson city")] )

# unresolved florida,okaloosa:
#x1small$mpname[which(x1small$mpname=="louisiana,st john the bapti")] <- "louisiana,st john the baptist"
#x1small$mpname[which(x1small$mpname=="louisiana,st martin")] <- "louisiana,st martin:north"
# unresolved louisiana,st martin:south
#x1small$mpname[which(x1small$mpname=="louisiana,vermilion")] <- "louisiana,vermilion"

## create vector of county names
mp <- maps::map("county", plot = FALSE, namesonly = TRUE)

#pdata_mlk$p_effect[pdata_mlk$per_black > max_black_threshold] <- 0

p_effect <- pdata_mlk$p_effect


# ## Calculate the riot effect of arrests
# riot.cols <- grep("_ARR", names(x9))
# riot.effect.arr.matrix <- x9[,riot.cols]
# p_effect <- x1small$p_effectMLK68

## Transform arrest data
#Cannonical
#riot.effect.arr <- sqrt(riot.effect.arr.matrix +1 )

#summary(p_effect)

## Convert p_effect into six sub-groups.  
#x1small$ri <- as.numeric(cut(p_effect,c(seq(min(p_effect), max(p_effect), length.out=6) ), include.lowest=TRUE )) # bins based on absolute measures

qnt.r <- unique(quantile(p_effect, probs=seq(0,1,length.out=7)) )
#qnt.r <- unique(quantile(p_effect, probs=seq(0,1,length.out=2)) )

pdata_mlk$ri <- as.numeric(cut(p_effect, qnt.r , include.lowest=TRUE )) # bins

#x1small$ri <- as.numeric(p_effect+1)
#hist(x1small$ri)


## clean up unmatched counties
# missing.counties <- base::setdiff(mp, pdata_mlk$mpname)
# l <- length(missing.counties)
# l
# missing <- pdata_mlk[1:l,]
# missing[1:l,] <- NA
# 
# missing$p_effect <- 0
# 
# missing$mpname <- missing.counties
# 
# 
# state.ri <- aggregate(pdata_mlk$ri, by=list(pdata_mlk$state), FUN="mean") 
# state.ri$x <- round(state.ri$x,0)
# #head(state.ri)
# 
# #sub("^(.*)(\\,.*)","\\1", pdata_mlk$mpname)
# missing$state <-  str_split_fixed(missing$mpname, ",", n=2)[,1]
# 
# mp <- match(missing$state, state.ri$Group.1)
# 
# missing$ri <- state.ri$x[missing.matches]
# 
# missing$ri[which(missing$mpname== "minnesota,mahnomen")] <- pdata_mlk$ri[grep("minnesota,becker", pdata_mlk$mpname)]
# 
# missing$ri[which(missing$mpname== "iowa,obrien")] <- pdata_mlk$ri[grep("iowa,clay$", pdata_mlk$mpname)]
# 
# pdata_mlk <- rbind(pdata_mlk, missing)




## ---- olswls.plot, fig.width=12, fig.height=5,  fig.pos="h!", fig.cap="OLS Models of Effect of Violent Protest in April 1968 on White Vote Share"  ----

# restore regular plotting function
#knit_hooks$set(plot = regular_plot)

# Custom knitr hook to add notes to the plot
knit_hooks$set(plot = function(x, options) {
    paste("\n\\end{kframe}\n\\begin{figure}[h!]\n",
          "\\includegraphics[width=\\maxwidth]{",
          opts_knit$get("base.url"), paste(x, collapse = "."),
          "}\n",
          "\\begin{minipage}{6in}\\small\\begin{flushleft}\\vspace*{2mm} \\textit{Note}: Coefficient plot of estimated effects of violent protest in April 1968 on change in white county-level Democratic vote share in presidential election in November 1968. No analysis of nonviolent protests is included as the DCA data records few nonviolent or violent protests in April 1968.\\end{flushleft}\\end{minipage}\\vspace*{2mm}",
          "\\caption{", options$fig.cap, " \\label{", options$fig.lp, opts_current$get("label"), "}}\n",
          "\n\\end{figure}\n\\begin{kframe}\n",
          sep = '')
})
# King is assassinated April~4th, 1968.

olswls.df <-
    data.frame(
        labels = c(
            "1. All counties",
            paste0("2. ", white_percent, "% White"),
            "3. All counties",
            paste0("4. ", white_percent, "% White"),
            "5. All counties",
            paste0("6. ", white_percent, "% White")
        ),
        estimate = NA,
        se = NA,
        n = NA
    )

olswls.df[1, 2:4] <- c(summary(ols68.unmatched)$coefficients[2,1:2], 
                       nrow(ols68.unmatched$model) )
olswls.df[2, 2:4] <- c(summary(ols68.unmatched.white)$coefficients[2,1:2], 
                       nrow(ols68.unmatched.white$model) )
olswls.df[3, 2:4] <- c(summary(ols68.maha)$coefficients[2,1:2], 
                       nrow(ols68.maha$model) )
olswls.df[4, 2:4] <- c(summary(ols68.maha.white)$coefficients[2,1:2], 
                       nrow(ols68.maha.white$model) )
olswls.df[5, 2:4] <- c(summary(ols68.cbps)$coefficients[2,1:2], 
                       nrow(ols68.cbps$model) )
#olswls.df[3, 2:4] <- c(summary(ols68.cbps)$coefficients[2,1:2], nrow(ols68.cbps$model) )
olswls.df[6, 2:4] <- c(summary(ols68.cbps.white)$coefficients[2,1:2], 
                       nrow(ols68.cbps.white$model) )

#print(xtable(olswls.df))

olswls.df$labels <- paste0( olswls.df$labels, " (N=", add_comma(olswls.df$n), ")" )

ols.plot <- diff.plot(olswls.df, "", ylevels=rev(olswls.df$labels) )
ols.plot <- ols.plot + coord_flip() + labs(
            #title = "OLS Models of Violent Protest in April 1968 on Vote Share",
            y = "Change in county-level Democratic vote share",
            x = ""
        ) + theme(
            plot.title = element_text(size = 18),
            text = element_text(size = 18)
        )


ols.plot <- ols.plot + theme(plot.margin = unit(c(1, 1, 1, 11.5), "lines"))    #+ theme_apsr_grid
    
    text_ols  <- 
        textGrob("Ordinary Least\nSquares",
                 gp = gpar(fontsize = 13, col = "grey20"))
    text_maha  <- 
        textGrob("Mahalanobis\nMatching",
                 gp = gpar(fontsize = 13, col = "grey20"))
    text_cbps  <- 
        textGrob("Covariate Balanced\nPropensity Score",
                 gp = gpar(fontsize = 13, col = "grey20"))

    yoffset <- -6
    textoffset <- -1.5

ols.plot <- ols.plot + 
    annotation_bracket(xoffset=5.5, yoffset=yoffset, groupsize=2, singlestem=.04, doublestem=.05) +
    annotation_bracket(xoffset=3.5, yoffset=yoffset, groupsize=2, singlestem=.04, doublestem=.05) +
    annotation_bracket(xoffset=1.5, yoffset=yoffset, groupsize=2, singlestem=.04, doublestem=.05)

ols.plot  <- 
    ols.plot + 
    annotation_text(text_ols, xoffset=5.5, yoffset=yoffset, textoffset=textoffset) +
    annotation_text(text_maha, xoffset=3.5, yoffset=yoffset, textoffset=textoffset) + 
    annotation_text(text_cbps, xoffset=1.5, yoffset=yoffset, textoffset=textoffset)
 
    gt <- ggplot_gtable(ggplot_build(ols.plot))
    gt$layout$clip[gt$layout$name == "panel"] <- "off"
    grid.draw(gt)






## ---- ftests, echo=FALSE  ----
#For more, see: https://diffuseprior.wordpress.com/2013/09/23/detecting-weak-instruments-in-r/

lm.week.fs <- lm( p_effect ~  as.numeric(rain_week)  + locgov_exp_pc_log + per_hsplus + per_black + per_black_sq + med_age + med_income + per_unemp + per_urban + tot_pop_log + per_owner_housing + per_pop_growth + per_pop_foreign + plag + south2 , data = cbps.data.white)

lm.week.fn <- lm( p_effect ~    locgov_exp_pc_log + per_hsplus + per_black + per_black_sq + med_age + med_income + per_unemp + per_urban + tot_pop_log + per_owner_housing + per_pop_growth + per_pop_foreign + plag + south2, data = cbps.data.white)

lm.week.fs.weights <- lm( p_effect ~  as.numeric(rain_week)  + locgov_exp_pc_log + per_hsplus + per_black + per_black_sq + med_age + med_income + per_unemp + per_urban + tot_pop_log + per_owner_housing + per_pop_growth + per_pop_foreign + plag + south2 , data = cbps.data.white, weights = weights.cbps.white)

lm.week.fn.weights <- lm( p_effect ~    locgov_exp_pc_log + per_hsplus + per_black + per_black_sq + med_age + med_income + per_unemp + per_urban + tot_pop_log + per_owner_housing + per_pop_growth + per_pop_foreign + plag + south2, data = cbps.data.white, weights = weights.cbps.white)

# simple F-test
ftest.simple <- waldtest(lm.week.fs, lm.week.fn)$F[2]
ftest.simple.weights <- waldtest(lm.week.fs.weights, lm.week.fn.weights)$F[2]

# F-test robust to heteroskedasticity
ftest.robusthetero <- waldtest(lm.week.fs, lm.week.fn, vcov = vcovHC(lm.week.fs, type="HC0"))$F[2]
ftest.robusthetero.weights <- waldtest(lm.week.fs.weights, lm.week.fn.weights, vcov = vcovHC(lm.week.fs.weights, type="HC0"))$F[2]

# F-test robust to clustering?
ftest.robustcluster <- waldtest(lm.week.fs, lm.week.fn, vcov = vcovHC(lm.week.fs, cluster="group"))$F[2]
ftest.robustcluster.weights <- waldtest(lm.week.fs.weights, lm.week.fn.weights, vcov = vcovHC(lm.week.fs.weights, cluster="group"))$F[2]


## ---- iv.plot1-1, fig.width=12, fig.height=5, fig.pos="h!", fig.cap="IV Models of Effect of Violent Protests in April 1968 on White Vote Share"  ----


# restore regular plotting function
#knit_hooks$set(plot = regular_plot)

# Custom knitr hook to add notes to the plot
knit_hooks$set(plot = function(x, options) {
    paste("\n\\end{kframe}\n\\begin{figure}[h!]\n",
          "\\includegraphics[width=\\maxwidth]{",
          opts_knit$get("base.url"), paste(x, collapse = "."),
          "}\n",
          "\\begin{minipage}{6in}\\small\\begin{flushleft}\\vspace*{2mm} \\textit{Note}: Coefficient plot of estimated effects of violent protest in April 1968 on change in county-level Democratic vote share in presidential election in November 1968. Models 1, 2, 5 and 6 are placebo tests that use rainfall during period with few or no protests. Models 2, 4 and 6 use CBPS weights to match. Protests data source: \\citet{carter1986black}. No analysis of nonviolent protests is included as the DCA data records few nonviolent or violent protests in April 1968.\\end{flushleft}\\end{minipage}\\vspace*{2mm}",
          "\\caption{", options$fig.cap, " \\label{", options$fig.lp, opts_current$get("label"), "}}\n",
          "\n\\end{figure}\n\\begin{kframe}\n",
          sep = '')
})


iv.df <-  NA

iv.df  <- 
    data.frame(
    # labels = c(
    # "Apr 1-3, All counties\n(0% of protests)",
    # "Apr 1-3, 90% White\n(0% of protests)",
    # "Apr 4-10, All counties\n(95% of protests)",
    # "Apr 4-10, 90% White\n(95% of protests)",
    # "Apr 11-30, All counties\n(5% of protests)",
    # "Apr 11-30, 90% White\n(5% of protests)"
    # ),
    #labels = c(
    #"1. All counties",
    #"2. 90% White",
    #"3. All counties",
    #"4. 90% White"#,
    #"5. All counties",
    #"6. 90% White")
    labels = c(
      paste0("1. ", white_percent, "% White"),
      "2. Matched",
      paste0("3. ", white_percent, "% White"),
      "4. Matched",
      paste0("5. ", white_percent, "% White"),
      "6. Matched"
      ),
    estimate = NA,
    se = NA,
    n = NA
    )
    



prep_robust_coef <- function(robust_model, orig_model) {
   coef_se <- broom::tidy(robust_model)[2, 2:3]
   N <- nrow(orig_model$model)
   return(as.numeric(c(coef_se, N)))
}

iv.df[1, 2:4] <- prep_robust_coef(se1.ivpre.white, 
                                  iv68.pre123.white)
iv.df[2, 2:4] <- prep_robust_coef(se2.ivpre.match.white, 
                                  iv68.pre123.match.white)
iv.df[3, 2:4] <- prep_robust_coef(se3.ivweek.white, 
                                  iv68.week.white)
iv.df[4, 2:4] <- prep_robust_coef(se4.ivweek.match.white, 
                                  iv68.week.match.white)
iv.df[5, 2:4] <- prep_robust_coef(se5.ivmonth.white, 
                                  iv68.month.placebo.white)
iv.df[6, 2:4] <- prep_robust_coef(se6.ivmonth.match.white, 
                                  iv68.month.placebo.match.white)



iv.df$labels <- paste0( iv.df$labels, " (N=", add_comma(iv.df$n), ")" )


iv.plot <- diff.plot(iv.df[1:6, ], "", ylevels = rev(iv.df$labels))

iv.plot <- iv.plot + labs(
            #title = "IV Models of Effect of Violent Protests in April 1968 on Vote Share"),
            y = "Change in county-level Democratic vote share",
            x = ""
        )


iv1 <- iv.plot + coord_flip() +
    #theme_apsr_coef +
    theme_cowplot() +
    theme(
        panel.grid.major = element_line(size = .5, color = "grey90"),
        axis.text = element_text(size = 12),
        strip.text = element_text(size=12, lineheight=0.5),
        panel.grid.minor = element_line(size = .25, color = "grey90"),

        plot.title = element_text(size = 18),
        text = element_text(size = 18),
        axis.text.x = element_text(
            angle = 0,
            hjust = 0,
            vjust = 0
        ),
        plot.margin = unit(c(0, 3, 1, 10.25), "lines")
    )
    

iv2 <- iv1 + coord_flip()
    
    text_prot0  <- 
        textGrob("April 1-3\n(0% of protests)",
                 gp = gpar(fontsize = 13, col = "grey20"))
    text_prot95  <- 
        textGrob("April 4-10\n(95% of protests)",
                 gp = gpar(fontsize = 13, col = "grey20"))
    text_prot5  <- 
        textGrob("April 11-30\n(5% of protests)",
                 gp = gpar(fontsize = 13, col = "grey20"))

    yoffset    <- -20
    textoffset <- -2.5

sstem <- .4
dstem <- .4

iv2 <- iv2 + 
    annotation_bracket(xoffset = 5.5, yoffset = yoffset, groupsize = 2, singlestem = sstem, doublestem = dstem) +
    annotation_bracket(xoffset=3.5, yoffset=yoffset, groupsize=2, singlestem = sstem, doublestem = dstem) +
    annotation_bracket(xoffset=1.5, yoffset=yoffset, groupsize=2, singlestem = sstem, doublestem = dstem)

iv2  <- 
    iv2 + 
    annotation_text(text_prot0, xoffset=5.5, yoffset=yoffset, textoffset=textoffset) +
    annotation_text(text_prot95, xoffset=3.5, yoffset=yoffset, textoffset=textoffset) + 
    annotation_text(text_prot5, xoffset=1.5, yoffset=yoffset, textoffset=textoffset) 

gt <- ggplot_gtable(ggplot_build(iv2))
gt$layout$clip[gt$layout$name == "panel"] <- "off"
grid.draw(gt)

## ---- mlk_counterfactual_sim_load_data, include = FALSE ----
    

y68vote <- read.csv(here("data/pres68_detailed_results3.csv"), header = TRUE) 
names(y68vote) <- tolower(names(y68vote))


## create data.frame with STATE and ST
st.codes <- read.csv(here("data/icpsr_states_fips_census.csv") )
names(st.codes) <- tolower(names(st.codes) )


## ---- mlk_counterfactual_sim, echo=FALSE, message=FALSE, warning=FALSE, error=FALSE, results='hide', fig.width=6 ----

st.codes$statefip <- str_pad(st.codes$statefip, width = 2, side = "left", pad = "0")

## Add ST field for merging and remove Hawaii, Alaska and DC
y68vote <- merge(y68vote, st.codes[ , 1:2], by.x = "state", by.y = "name")


## Estimate vote totals for each candidate 
pdata_mlk$pdemtot  <- (pdata_mlk$pttl * pdata_mlk$pdem  * .01) 
pdata_mlk$preptot  <- (pdata_mlk$pttl * pdata_mlk$prep  * .01) 
pdata_mlk$pwalltot <- (pdata_mlk$pttl * pdata_mlk$pwall * .01) 

## Aggregate county-level data to state-level
dem.actual  <- aggregate(pdata_mlk$pdemtot,  by = list(pdata_mlk$st), sum)
rep.actual  <- aggregate(pdata_mlk$preptot,  by = list(pdata_mlk$st), sum)
wall.actual <- aggregate(pdata_mlk$pwalltot, by = list(pdata_mlk$st), sum)

total       <- aggregate(pdata_mlk$pttl,     by = list(pdata_mlk$st), sum)

## Create vector of who wins each state

win.actual <- case_when(
    dem.actual$x > rep.actual$x & 
        dem.actual$x > wall.actual$x ~ "D",
    rep.actual$x > dem.actual$x & 
        rep.actual$x > wall.actual$x ~ "R",
    TRUE ~ "W"
)



## Create data frame of aggregated vote results
elec <-
    data.frame(
        st   = dem.actual$Group.1,
        dem  = dem.actual$x,
        rep  = rep.actual$x,
        wall = wall.actual$x,
        win.actual
    )




## Merge electoral data and aggregated vote data
ev <- merge(elec, 
            y68vote[, c("stateab", "state", "evn", "evh", "evw", "evtotal")], 
            by.x = "st", 
            by.y = "stateab")

#head(ev)

# sum(ev$evtotal[ev$win.cf=="D"])
# sum(ev$evtotal[ev$win.cf=="R"])
# sum(ev$evtotal[ev$win.cf=="W"])



## deterministic, no probability model for electoral college
## in counterfactual, estimated losses are added back to dems
## so negative shift is *subtracted* from dems (to add those votes back)
## and negative shift is added to rep to deduct from GOP
## then test is whether dem beats shifted GOP & beats Wallace
cf.det <- function(sh) { 
    #sh <- as.vector(unlist(sh))
    d.cf <- dem.actual$x - sh
    r.cf <- rep.actual$x + sh
    dem.wins <- which( d.cf > r.cf & 
                       d.cf > wall.actual$x )
    return(dem.wins)
}

## multinomial model for allocation of electoral votes
cf.multinom <- function(sh) {
    pdem <-  (dem.actual$x - sh) / total$x
    prep <-  (rep.actual$x + (1 * sh) ) / total$x
    pwall <-  (wall.actual$x  + (0 * sh)) / total$x
    rows <- nrow(dem.actual)
    #    print(cbind(pdem,prep,pwall))
    
    rmult <- apply(cbind(pdem,prep,pwall), 1, function(x) {rmultinom(1,1, x)} )
    
    dem.wins <- which(rmult[1,] == 1 )
        
    return(dem.wins)
}

cf.dir <- function(sh) {
    pdem <-  (dem.actual$x - sh) / total$x * 100
    prep <-  (rep.actual$x + (1 * sh) ) / total$x * 100
    pwall <-  (wall.actual$x + (0 * sh)) / total$x * 100
    #    rows <- nrow(dem.actual)
    #    print(cbind(pdem,prep,pwall))
    
    rdir <- apply(cbind(pdem,prep,pwall), 1, function(x) {colMeans(rdirichlet(ec.sims, x))} )
    
    winner <- apply(rdir, 2, which.max )
    
    dem.wins <- which(winner==1)
    
    return(dem.wins)
}



# violent protest effect
coef <- mean(c(plm.v.lag.white$coef[1], ols68.cbps.white$coef[2] ) ) 

ses  <- mean(c(broom::tidy(plm.v.lag.white)[1, "std.error"] %>% as.numeric(), 
               broom::tidy(ols68.cbps.white)[2, "std.error"] %>% as.numeric() ) ) # se.coef(ols68.maha)[2],  





st.sims <- 10000
ec.sims <- 500
    
r <- nrow(pdata_mlk)

elec.shift <- rnorm(r * st.sims, coef, ses) * .01

# create matrix, row = county, col = rnorm shift in vote
shift <- matrix(elec.shift, nrow = r, ncol = st.sims)

#dim(shift)


shift.ttl <- data.frame(st = pdata_mlk$st, 
                             pdata_mlk$pttl * shift * pdata_mlk$p_effect)

# delete large shift object
rm(shift)


## aggregate/sum all counties by state (group_by state, sum counties)
## result is state level shift total x 10000 simulations 
shift.agg <- aggregate(shift.ttl[ ,2:(st.sims + 1)], 
                       by = list(shift.ttl$st), sum)

#dim(shift.agg)

#shift.agg[1:5,1:4]

# with no shift (0), which states does dem candidate win
dem.actuals <- cf.det(0)

# with some shift, across 10K columns, what are the dem counterfactuals
dem.cfs <- apply(shift.agg[ , 2:(st.sims+1)], 2, cf.det)

#length(dem.cfs)

# extract sum of total electoral votes from counterfactual scenario
ev.cf <- function(states) { sum(ev$evtotal[states]) }

# for each of 10K counterfactuals, calculate sum of ev wins
evs <- unlist(lapply(dem.cfs, ev.cf))

# add HI and DC to ev total
evs <- evs + 7 

# identify subset of unique counterfactuals
scenarios <- unique(dem.cfs)
#length(scenarios)

#scenarios

# calculate total evs in counterfactual
cf.evs <- unlist(lapply(scenarios, ev.cf))
cf.evs <- cf.evs + 7 #HI + DC

# extract state gains over and above dem actuals for each scenario
cf.gains <- lapply(scenarios, function(x){base::setdiff(x, dem.actuals)} )
# cf.gains

# extract state names over and above dem actuals for each scenario
cf.names <- lapply(cf.gains, function(x){ev$st[x]} )
# cf.names
#length(cf.names)


## ---- mlk_counterfactual_stats, echo=FALSE, results='asis' ----

# identify modal scenario
mode <- as.numeric( names( which.max( table( evs ) ) ) )


# identify modal electoral total index
modal.ev <- which(cf.evs == mode)



# extract states gained in modal outcome
cf.states <- as.character(ev$state[unlist(cf.gains[modal.ev])])

# cf.states

l <- length(cf.states)

# before last state, add and
cf.states[l] <- paste("and ", as.character(cf.states[l]))

cf.states.text <- paste(cf.states, collapse=", ")

#cf.states.text

# calculate counterfactual total vote shift from rep to dem
vote.totals <- colSums(dem.actual$x - shift.agg[2:(st.sims+1)]) - colSums(rep.actual$x + shift.agg[2:(st.sims+1)])

# identify mean vote total (formerly modal)
mean.vote.total <- mean(vote.totals) 

credible.interval <- quantile(vote.totals, prob=c(.025, .975))

#setwd("~/Dropbox/diss/Prospectus")



## ---- counterfactual_summary, echo=FALSE, message=FALSE, warning=FALSE, error=FALSE, results='asis' ----
# cache.sim


evs.rank <- order(cf.evs)

cf.gains.states <-
    data.frame(
        lapply(cf.names, 
               function(x) {
                 paste(unlist(x), collapse = ", ")
    }))
cf.gains.states <- as.vector(unlist(cf.gains.states))
#print(cf.gains.states)

evs.freq  <- table(evs)
evs.match <- match( cf.evs, as.numeric(names(evs.freq)) )
evs.count <- as.vector(evs.freq)[evs.match]
evs.per   <- evs.count / st.sims * 100 

#print(evs.freq)
#print(evs.per)

evs.table <-
    data.frame(
        cf.gains.states,
        as.character(cf.evs),
        sapply(evs.count, add_comma),
        as.character(round(evs.per))
    )[evs.rank, ]

names(evs.table) <- c("Humphrey Gains", "# of Electoral Votes", "# of Outcomes", "% of Outcomes")

save(evs.table, file = here("data/evs_table.Rdata") )


xt.evs <-
    xtable(
        evs.table,
        hline.after = c(0),
        #align = c("l", "l", "r", "r", "r"),
        align = c(
            "p{0.25\\textwidth} ",
            "p{0.22\\textwidth} ",
            "R{0.20\\textwidth} ",   # R{} requires \newcolumntype in preamble
            "R{0.20\\textwidth} ",
            "R{0.20\\textwidth}"
            # "p{0.35\\textwidth}|"
            ),
        label = "tab:simulation_results",
        caption = paste(
            "Results of ",
            add_comma(st.sims),
            " counterfactual simulated elections",
            sep = ""
        )
    ) #floating=FALSE)

comment          <- list()
comment$pos      <- list()
comment$pos[[1]] <- c(nrow(evs.table))
comment$command  <- c(paste(" \n", sep = ""))


# only writes file, does not output to pdf
print(xt.evs,
    include.rownames = FALSE,
    add.to.row = comment,
    file = "table_simulation_results.tex",
    size = "\\small" )



## ---- table.evs, results='asis', echo=FALSE ----

comment          <- list()
comment$pos      <- list()
comment$pos[[1]] <- c(nrow(evs.table))
comment$command  <- c(paste(" \n", sep = ""))

#comment$pos[[1]] <- c(nrow(evs.table))
comment$command  <- c(paste0("\\hline \\\\[.05ex] \\multicolumn{4}{L{6in}}{
\\textit{Note}: Effect of violent protests on `treated' county-level Democratic vote share estimated at ",  round(coef,2), ".} " ) )

print(xt.evs,
    hline.after = c(0),
    include.rownames = FALSE,
    add.to.row = comment,
    size="\\small"
             )


## ---- choro_electoralvote_counter_calc, include = FALSE, eval = TRUE  ----


map.df <- ev[ ,c("st", "state", "win.actual")]

names(map.df) <- tolower(names(map.df) )

map.df$state <- tolower(map.df$state)

#head(map.df)

names(map.df) <- c("st", "region",  "win.actual")

dem.gain <- unlist(cf.names[[modal.ev]])
dem.gain.st <- which(map.df$st %in% dem.gain)

map.df$win.cf <- as.character(map.df$win.actual)
map.df$win.cf[dem.gain.st] <- "D2"

map.df$labels <- as.factor(as.character(car::recode(map.df$win.cf, "'D'='Humphrey Actual';'R'='Nixon';'W'='Wallace';'D2'='Humphrey Gains'") ) )


choro <- merge(gg_states, map.df, by.x="region", by.y="region")

choro <- choro[order(choro$order), ]



## ---- choro_electoralvote_counter,  fig.cap="Counterfactual choropleth map of the 1968 U.S. presidential election", echo=FALSE, warning=FALSE, message=FALSE, error=FALSE, fig.width=14, eval=TRUE  ----

# cache.map

# Custom hook for Note from:
# https://stackoverflow.com/questions/36047072/knitr-add-figure-notes

# Custom knitr hook to add notes to the plot
knit_hooks$set(plot = function(x, options) {
    paste("\n\\end{kframe}\n\\begin{figure}\n",
          "\\includegraphics[width=\\maxwidth]{",
          opts_knit$get("base.url"), paste(x, collapse = "."),
          "}\n",
          "\\begin{minipage}{6in}\\small\\begin{flushleft} \\textit{Note}: Choropleth map of the 1968 U.S. presidential election with electoral votes allocated under the counterfactual scenario of King not being assassinated and 137 uprisings not occurring in the wake of his death. \\end{flushleft}\\end{minipage}",
          "\\caption{", options$fig.cap, " \\label{", options$fig.lp, opts_current$get("label"), "}}\n",
          "\n\\end{figure}\n\\begin{kframe}\n",
          sep = '')
})

#\\vspace*{2mm}

# set up choropleth
Map <- qplot(
    long,
    lat,
    data   = choro,
    group  = group,
    fill   = win.cf,
    geom   = "polygon",
    xlab   = NULL,
    ylab   = NULL,
    colour = I("grey40"),
    lwd    = I(.5)
)

Map <- Map + labs(fill="win.cf")

Map <- Map + scale_x_continuous(breaks = NULL) +
    scale_y_continuous(breaks = NULL) +
    theme(
      panel.grid.major     = element_blank(),
      panel.grid.minor     = element_blank(),
      panel.background     = element_blank(),
      panel.border         = element_blank(),
      axis.ticks           = element_blank(),
      axis.line.x.bottom   = element_blank(),
      axis.line.y.left     = element_blank(),
      legend.title         = element_text(hjust = 0, face = "bold", size = 20),  
      legend.text          = element_text(size = 20),
      legend.justification = c(0, .5),
      legend.position      = c(.81, .25),
      plot.margin          = unit(c(0, 0, 0, 0), "cm")
      )

colors_map <- c("#001088BF", "#2166AC66", "#B2182BBB", "#EEEEEEEE")

Map + scale_fill_manual(
    values = colors_map,
    name   = "Candidates",
    labels = c("Humphrey", "Humphrey Gains", "Nixon", "Wallace")) +
    theme(aspect.ratio = .525) +
    xlab("") +
    ylab("")



## ---- load_all_data, eval=TRUE, include=FALSE ----

load(here("data/all_data_attica_congress_no_nyt.Rdata") )




## ---- trend_plot_setup, eval=TRUE  ----

gg_color_hue <- function(n) {
  hues = seq(15, 375, length = n + 1)
  hcl(h = hues, l = 65, c = 100)[1:n]
}


# Viridis
colors_three <- viridis::viridis(begin = 0, end = .8, n = 3, alpha = .9, direction = 1)

txt_sz   <- 3.5
line_sz  <- 0.8


legend_custom <- function(ggplot_obj, label, xmin, ymax, yshift, txt_sz=4, line_sz=1.1, line_type = c("31", "solid", "61"), segment_length=220, cols){
    

    print(ggplot_obj +
              geom_shadowtext(data = data.frame(x = xmin, y = ymax - (yshift * 1), 
                                                label = label[1]), 
                              aes(y=y, x=x, label = label),
                              size=txt_sz, hjust=0, 
                              bg.color='white', bg.r = bg_ratio, col = 'black') +
              geom_shadowtext(data = data.frame(x = xmin, y = ymax - (yshift * 2), 
                                                label = label[2]), 
                              aes(y=y, x=x, label = label),
                              size=txt_sz, hjust=0,
                              bg.color='white', bg.r = bg_ratio, col = 'black') + 
              geom_shadowtext(data = data.frame(x = xmin, y = ymax - (yshift * 3), 
                                                label = label[3]), 
                              aes(y=y, x=x, label = label),
                              size=txt_sz, hjust=0,
                              bg.color='white', bg.r = bg_ratio, col = 'black') + 
              annotate("segment", 
                       x = xmin - segment_length, xend = xmin-10, 
                       y = ymax - (yshift * 1), yend = ymax - (yshift * 1), 
                       colour = cols[1], size = line_sz, linetype=line_type[1]) +
              annotate("segment", 
                       x = xmin - segment_length, xend = xmin-10, 
                       y = ymax - (yshift * 2), yend = ymax - (yshift * 2), 
                       colour = cols[2], size = line_sz, linetype=line_type[2]) +
              annotate("segment", 
                       x = xmin - segment_length, xend = xmin-10, 
                       y = ymax - (yshift * 3), yend = ymax - (yshift * 3), 
                       colour = cols[3], size = line_sz, linetype=line_type[3]) 
    )
    
}


## ---- protests_nonviolent_news_opinion_civil_rights, eval = TRUE, fig.width = 8.5, fig.height = 4.5, fig.pos = "h!", fig.cap = "Nonviolent protest activity, headlines \\& public opinion on `civil rights,' by month" ----
#cache.var

# Custom hook for Note from:
# https://stackoverflow.com/questions/36047072/knitr-add-figure-notes

# Custom knitr hook to add notes to the plot
knit_hooks$set(plot = function(x, options) {
    paste("\n\\end{kframe}\n\\begin{figure}[h!]\n",
          "\\includegraphics[width=\\maxwidth]{",
          opts_knit$get("base.url"), paste(x, collapse = "."),
          "}\n",
          "\\begin{minipage}{6in}\\small\\begin{flushleft}\\textit{Note}: Scatter plot with smoothed trend lines of nonviolent protest activity, news coverage of `civil rights' and public opinion about `civil rights' as the most important problem, aggregated by month. \\end{flushleft}\\end{minipage}",
          "\\caption{", options$fig.cap, " \\label{", options$fig.lp, opts_current$get("label"), "}}\n",
          "\n\\end{figure}\n\\begin{kframe}\n",
          sep = '')
})


sp <- .075
#al <- .4

line_types <- c("31","solid", "61")
point_alpha <- 0.6

ad_wide <- all_data %>%
    filter(
        date >= as.numeric(as.Date("1960-01-01")) & 
        date <= as.numeric(as.Date("1972-12-31")) 
        ) %>%
    mutate(
        type_cat = paste(type, category, sep = "_"),
        value    = ifelse(type == "protests", log(value + 1), value) 
        )  %>%
    group_by(month, type_cat) %>%
    summarize(value = sum(value)) %>%
    ungroup() 

    
ad <- ad_wide %>%
    #mutate(key_cat = paste(type, category, sep="_") ) %>%
    spread(key = type_cat, value = value, fill = NA)


p <- ad %>%
    ggplot(aes(x = month ) ) +
    labs(y = "# Front page Headlines | Public Opinion (%)", x = "") + 
    scale_x_date(date_labels = "%Y", date_breaks = "year") +
    coord_cartesian(ylim = c(0, 60)) + 
    theme_apsr_grid
    


p <- p + 
    geom_point(aes(y = polls_civil_rights ), size = 1.5,  color = colors_three[3], alpha = point_alpha) +
    #geom_line(aes(month, polls_civil_rights ), linetype = "31", color = colors_three[3]) +

    geom_smooth(
        aes(y    = polls_civil_rights),
        method   ="loess",
        se       = FALSE,
        color    = colors_three[3],
        span     = .05,
        size     = lwt,
        linetype = "31"
    ) 

p <- p + 
    geom_point(aes(y = news_rights), 
               size = sz, shape = "h", color = colors_three[2], alpha = point_alpha) +
    geom_smooth(
        aes(y    = news_rights),
        method   ="loess",
        se       = FALSE,
        color    = colors_three[2],
        span     = sp,
        size     = lwt,
        linetype = 1 #"21"
    ) 


p <- p + geom_point(aes( y = protests_nonviolent / 10 ), 
                    size = sz, shape = "p", color = colors_three[1], alpha = point_alpha) +
    geom_smooth(
        aes(y    = protests_nonviolent / 10),
        method   = "loess",
        se       = FALSE,
        color    = colors_three[1],
        span     = sp,
        size     = lwt,
        linetype = "91"
    ) 


p <- p + geom_vline(
 	xintercept=as.numeric(c(as.Date("1960-11-08"), 
	as.Date("1964-11-03"), as.Date("1968-11-05"), 
	as.Date("1972-11-07") ) ), 
	linetype="13", alpha = .7, size = .75 )  + 
	geom_shadowtext(data = 
	              data.frame(polls_civil_rights = 56, 
	                         Date = as.Date("1961-02-01"), 
	                         label="Presidential Elections\n(1960-1972)"),
	          aes(y=polls_civil_rights, x=Date, label = label), 
	          col = 'black', bg.color = 'white', bg.r = bg_ratio, hjust=0, size=txt_sz) 

p <- p + scale_y_continuous(sec.axis = sec_axis(~.*10, name = "# Nonviolent protesters (log)"))

legend_custom(p, 
              label = c("Public opinion\nabout 'civil rights'",
                        "Headlines with\n'civil rights' (h)",
                        "Nonviolent protest\nparticipants (p)"), 
              xmin           = as.Date("1966-02-01"), 
              ymax           = 66, 
              yshift         = 10, 
              txt_sz         = txt_sz, 
              line_sz        = lwt, 
              line_type      = line_types,
              segment_length = 220, 
              cols           = colors_three[3:1] )





## ---- protests_violent_news_opinion_riots, eval=TRUE, fig.width=8.5, fig.height=4.5,  fig.pos="h!", fig.cap="Violent protest activity, headlines on `riots' and public opinion on `social control', by month." ----

#cache.var

# Custom hook for Note from:
# https://stackoverflow.com/questions/36047072/knitr-add-figure-notes

# Custom knitr hook to add notes to the plot
knit_hooks$set(plot = function(x, options) {
    paste("\n\\end{kframe}\n\\begin{figure}[h!]\n",
          "\\includegraphics[width=\\maxwidth]{",
          opts_knit$get("base.url"), paste(x, collapse = "."),
          "}\n",
          "\\begin{minipage}{6in}\\small\\begin{flushleft}\\textit{Note}: Scatter plot with smoothed trend lines of violent protest activity, news coverage of `riots' and public concern about `social control', aggregated by month. Violent protest activity in June 1967 and April 1968 exceeds 200 on arrests scale. For visualization, those two data points plotted at half of measured values. \\end{flushleft}\\end{minipage}",
          "\\caption{", options$fig.cap, " \\label{", options$fig.lp, opts_current$get("label"), "}}\n",
          "\n\\end{figure}\n\\begin{kframe}\n",
          sep = '')
})

ad2 <- ad %>%
    dplyr::filter(month >= as.numeric(as.Date("1964-01-01")) & 
                  month <= as.numeric(as.Date("1972-12-31")) ) %>%
    mutate(protests_violent2 = 
               # two obs > 200 sum log arrests / month, halve for data visualization
               ifelse(protests_violent2 > 200,  protests_violent2 / 2, protests_violent2 ) ) %>%
    mutate(protests_violent2 = 
               # impute ~39 missing arrests from DCA data with 1/4th attendance
               ifelse(is.na(protests_violent2), protests_violent  / 4, protests_violent2 ) ) #%>%
    
    #mutate(protests_violent2 = protests_violent2 /2 )  %>%
    #mutate(protests_violent2 = ifelse(protests_violent2 < 1, NA, protests_violent2) )  %>%
    #mutate(news_riot = news_riot /2 ) #%>%


p <- ad2 %>% 
    ggplot(aes(x = month)) +
    labs(y="Public Opinion (%)", x="") + 
    #ggtitle("Violent protest activity, headlines on 'riots' and public opinion on 'social control', by month") +
        scale_x_date(date_labels="%Y", date_breaks="year") +
        coord_cartesian(ylim=c(0, 70)) + 
        theme_apsr_grid



p <- p  +  
        geom_point(aes(y=polls_social_control ), size = (sz - .4),  color = colors_three[3], alpha = point_alpha) +
        geom_smooth( 
        aes(y=polls_social_control),
        method="loess",
        se = FALSE,
        color = colors_three[3],
        span = .075,
        size = lwt,
        linetype = line_types[1]
    )

p <- p + 
    geom_point(aes(y=news_riot/2), size = (sz + 0), shape = "h", color = colors_three[2], alpha = point_alpha) +
    geom_smooth( 
        aes(y=news_riot/2),
        method="loess",
        se = FALSE,
        color = colors_three[2],
        span = sp,
        size = lwt,
        linetype = line_types[2]
    ) 


p <- p +  
    geom_point(aes(y=protests_violent2/2), size = (sz + 0), shape = "p", color = colors_three[1], alpha = point_alpha) +
    geom_smooth(
        aes(y=protests_violent2/2),
        method="loess",
        se = FALSE,
        color = colors_three[1],
        span = .075,
        size = lwt,
        linetype = line_types[3]
    ) 


p <- p + geom_vline(
 	xintercept=as.numeric(c(
	as.Date("1964-11-03"), as.Date("1968-11-05"), 
	as.Date("1972-11-07") ) ), 
	linetype="13", alpha = .7, size = .75 )  + 
	geom_shadowtext(data = 
	              data.frame(polls_civil_rights = 65, 
	                         Date = as.Date("1968-12-20"), 
	                         label = "Presidential Elections\n(1964-1972)"),
	          aes(y = polls_civil_rights, x = Date, label = label), 
	          hjust = 0, size = txt_sz, col = 'black', bg.r = .2, bg.color = 'white') 

p <- p + scale_y_continuous(
          sec.axis = sec_axis(~.*2, breaks = c(0, 40, 80, 120), 
                              name = "# Arrests (log) | # Front page Headlines "))   


legend_custom(p, 
              label = c("Public opinion\nabout 'social control'",
                       "Headlines with\n'riot' (h)",
                       "Violent protest\narrests (p)"), 
              xmin           = as.Date("1965-08-01"),
              ymax           = 75,
              yshift         = 10,
              txt_sz         = txt_sz,
              line_sz        = 1.0, 
              line_type      = line_types,
              segment_length = 170, 
              cols           = colors_three[3:1] )


## ---- congress_riots_violent_protests_plot_load_data, include = FALSE ----

load(here("data/congress_counts3_new.Rdata"), verbose = TRUE )

# read Carter violent protest data
carter_vp <- read_csv(here("data/Carter_data/carter_violent_protests_geocoded.csv")) 


## ---- congress_riots_violent_protests_plot,  fig.width=9, fig.height=4.5, fig.pos="h!", fig.cap="Violent Protest Arrests and Congressional Speech on `Riots,' 1964-1972, by Week",  warning=FALSE ----
#cachefalse
#cache.elite

# Custom hook for Note from:
# https://stackoverflow.com/questions/36047072/knitr-add-figure-notes

# Custom knitr hook to add notes to the plot
knit_hooks$set(plot = function(x, options) {
    paste("\n\\end{kframe}\n\\begin{figure}[h!]\n",
          "\\includegraphics[width=\\maxwidth]{",
          opts_knit$get("base.url"), paste(x, collapse = "."),
          "}\n",
          "\\begin{minipage}{6in}\\small\\begin{flushleft}\\textit{Note}: Line plot of weekly counts of Congressional speech using phrase ``riot,'' ``antiriot,'' and weekly, black-led violent protest arrests  from 1964 to 1972. Data sources: Congressional Record and \\citet{carter1986black}. \\end{flushleft}\\end{minipage}\\vspace{2mm}",
          "\\caption{", options$fig.cap, " \\label{", options$fig.lp, opts_current$get("label"), "}}\n",
          "\n\\end{figure}\n\\begin{kframe}\n",
          sep = '')
})

# dropped reviewers request for ``unrest,'' ``attica,'' ``disorder,'' ``mayhem,'' or ``disturbance'' 


## coarsen date to week using relevant Sunday 
weekify <- function(dates) {
    as.Date(cut(dates,
                breaks = "week",
                start.on.monday = FALSE), 
            origin = "1970-01-01")
}

## coarsen date to month using first day of month 
monthify <- function(dates) {
    as.Date(cut(dates,
                breaks = "month"), 
            origin = "1970-01-01")
}

## coarsen date to quarter using first day of month 
quarterify <- function(dates) {
    as.Date(cut(dates,
                breaks = "quarter"), 
            origin = "1970-01-01")
}



periodize2 <- function(data, date, start = "1960-01-01", end = "1972-12-31", unit, method, func, verbose = FALSE) {
    
    df <- data.frame(date=date, data=data)
    df <- df[df$date >= as.Date(start, origin = "1970-01-01") & df$date <= as.Date(end), ]
    
    if(unit == "week") {
        df$date <- weekify(df$date)
    } else if(unit == "month") {
        df$date <- monthify(df$date)
    } else {
        df$date <- quarterify(df$date)
    }
    
    # establish lower and upper bounds guided by available data and/or overall bounds
    min.date <- max(as.Date(start), min(df$date) )
    max.date <- min(as.Date(end), max(df$date) )
    
    all.time <- seq(from=min.date, to=max.date, by=unit)
    missing.times <- as.Date(base::setdiff(all.time, df$date) )
    
    if(length(missing.times) > 0) { 
        df.miss <- data.frame(date=missing.times, data=NA)
        
        # combine original & missing 
        df <- rbind(df, df.miss)
    }
    
    # order data 
    df <- df[order(df$date), ]
    
    # create zoo object with duplicate dates combined by "func" either mean or sum
    dfz <- read.zoo(df, index="date", aggregate = func, na.rm=TRUE, drop=FALSE)
    names(dfz) <- "data"
    
    ## if first entry is NA, us na.locf but with first observation carried backward
    if(is.na(dfz$data[1])) dfz$data[1] <- na.locf(dfz$data, fromLast = TRUE)[1]
    
    
    # if method == number, replace NAs with single number
    if( is.numeric(method) ) {dfz[is.na(dfz$data)] <- method ; dfz2 <- dfz }
    
    # replace internal NAs with global mean
    if(method == "mean") {dfz$data[is.na(dfz$data) ] <- mean(dfz$data, na.rm=TRUE); dfz2 <- dfz}
    
    # replace internal NAs with linear interpolated data
    if(method == "linear") dfz2 <- na.approx(dfz)
    
    # replace internal NAs with cubic spline interpolated data
    if(method == "spline") dfz2 <- na.spline(dfz)
    
    # replace internal NAs with stineman interpolated data
    if(method == "stineman") {require("stinepack"); dfz$data <- na.stinterp(dfz$data); dfz2 <- dfz }
    
    
    less.than.zero <- which(dfz2$data < 0)
    dfz2$data[less.than.zero] <- 0
    
    # difference data to address stationarity
    dfz2$diff <- diff(dfz2$data)
    
    return(dfz2)
}


# ensure date format
carter_vp$date <- as.Date(carter_vp$date)


zoo.to.data.frame <- function(x, index.name="Date") {
    stopifnot(is.zoo(x))
    xn <- if(is.null(dim(x))) deparse(substitute(x)) else colnames(x)
    setNames(data.frame(index(x), x, row.names=NULL), c(index.name,xn))
}

arrests <-
    periodize2(
        data   = carter_vp$newarr,
        date   = carter_vp$date,
        start  = "1964-01-01",
        end    = "1972-01-01",
        unit   = "week",
        method = 0,
        func   = "sum"
    )

# deprecated
# congress_counts_by_day$date <- as.Date(paste(congress_counts_by_day$year, congress_counts_by_day$month, congress_counts_by_day$day, sep = "-"))

#stop("manual break")

counts <-
    periodize2(
        data   = congress_counts_by_day$riots_count,
        date   = congress_counts_by_day$date,
        start  = "1964-01-01",
        end    = "1972-01-01",
        unit   = "week",
        method = 0,
        func   = "sum"
    )

#periodize2(data=count$key[[1]]$count, date=count$key[[1]]$date,  start = "1964-01-01", end = "1972-01-01", unit = "week", method = 0, func = "sum")

m.data <- merge(counts, arrests)
#head(m.data)

d2 <- zoo.to.data.frame(x = m.data, index.name = "date")
#d3 <-  melt(d2[, c("date",  "data.counts", "data.arrests")], id.var = "date")
#head(d2)


# Viridis colors from earlier
#colors_four <- viridis::viridis(begin = 0, end = .9, n = 4, alpha = .9, direction = -1)
colors_two <- viridis::viridis(begin = .2, end = .85, n = 2, alpha = .9, direction = 1)
#colors_two <- colors_four[c(2,4)]

# guide=FALSE, disables KEY / LEGEND on right hand side
c <- ggplot(data = d2, aes(x=date) ) + 
    ylab("Count of Congressional use of 'riot' terms" ) + # | Violent protest arrests (10s)
    xlab("") + 
    #ggtitle("Violent Protest Arrests and Congressional Speech on 'Riots,' 1964-1972, by Week") +
    scale_y_continuous( labels=scales::comma, sec.axis = sec_axis(~.*10, name = "Violent Protest Arrests", labels=scales::comma)) + 
    scale_x_date(date_labels="%Y", date_breaks="year") +
    theme_apsr_grid


c <- c +
    geom_line(aes(y = data.counts), size = (lwt - 0.1), color = colors_two[2]) +
    geom_shadowtext(
        data = data.frame(x = as.Date("1966-03-01"),
                          y = 1650),
        aes(x = x, y = y, label = "Congressional\nuse of 'riots'"),
        bg.color = 'white',
        bg.r     = bg_ratio,
        col      = 'black',
        size     = tx,
        hjust    = 0
    )
#bg.color='white', bg.r = bg_ratio, col = 'black'

c <- c +
    geom_line(aes(y = data.arrests / 10), size = (lwt - 0.1), color = colors_two[1]) +
    geom_shadowtext(
        data = data.frame(x = as.Date("1968-05-10"),
                          y = 1650),
        aes(x = x, y = y, label = "Violent protest\narrests"),
        bg.color = 'white',
        bg.r     = bg_ratio,
        col      = 'black',
        size     = tx,
        hjust    = 0
    )

c

# old red col F8766D old light blue col 619CFF dark blue 3871D1

## ---- results = 'hide' ----
# restore regular plotting function
knit_hooks$set(plot = regular_plot)


## ---- granger.functions ----
## create function to take data, dates, return zoo object



periodize <- function(data, date, start = "1960-01-01", end = "1973-01-01", unit, method = "linear", func="sum", verbose = FALSE) {
    
    df <- data.frame(data=data, date=date)
    df <- df[df$date >= as.Date(start) & df$date <= as.Date(end), ]
    
    if(unit == "week") {
        df$period <- weekify(df$date) 
    } else if(unit == "month") {
        df$period <- monthify(df$date) 
    } else {
        #if(unit == "quarter")
        df$period <- quarterify(df$date)
    }
    
    df.agg <- aggregate(data ~ period, data=df, FUN=func, na.rm=TRUE)
    if(verbose) {print(head(df.agg)); print(dim(df.agg))}
    
    all.time <- seq(from=min(df.agg$period), to=max(df.agg$period), by=unit)
    if(verbose) {print(head(all.time)); print(length(all.time))}
    
    missing.times <- as.Date(base::setdiff(all.time, df.agg$period) )
    if(verbose) print(head(missing.times))
    
    # if there are missing periods, add in NAs
    if(length(missing.times) > 0) { 
        df.miss <- data.frame(period=missing.times, data=NA)
        
        # combine original & missing 
        df.all <- rbind(df.agg, df.miss)
    }
    else
    {df.all <- df.agg}
    
    # order data 
    df.all <- df.all[order(df.all$period), ]
    
    
    # create zoo time series object 
    # drop=FALSE keeps data in data.frame rather than dropping to vector
    # if drops to vector, na.approx doesn't work
    df.all <- read.zoo(df.all, drop=FALSE)
    if(verbose) print(head(df.all))
    
    # identify NA rows
    missing <- which(is.na(df.all$data) )
    
    # if method == number, replace NAs with single number
    if( is.numeric(method) ) df.all[missing] <- method
    
    # replace internal NAs with global mean
    if(method == "mean") df.all$data[missing] <- mean(df.all$data, na.rm=TRUE)
    
    # replace internal NAs with local mean # NOT IMPLEMENTED
    #if(method == "local") df.all[missing] <- mean(df.all, na.rm=TRUE)
    
    # replace internal NAs with interpolated data
    if(method == "linear") df.all <- na.approx(df.all)
    
    # replace internal NAs with cubic spline interpolated data
    if(method == "spline") df.all <- na.spline(df.all)

    # replace internal NAs with stineman interpolated data
    if(method == "stineman") df.all$data <- na.stinterp(df.all$data)


    # difference data to address stationarity
    df.all$diff <- diff(df.all$data)
    
    return(df.all)
}

#method = "spline", func="sum",




## ---- framing_load_packages_data, echo=FALSE, eval=TRUE  ----
#detach("package:plyr", unload=TRUE)

library(tidytext)
library(topicmodels)
library(purrr)

library(dplyr)


## ---- framing_lda_analysis_load_data, include = FALSE
load(here("data/newspapers_random_subsample_all_words_no_nyt.Rdata") )


load(here("data/newspapers_random_subsample_article_doc_topic.Rdata") )


## ---- framing_lda_analysis, eval=TRUE, results='hide' ----

#words
id_viold <- words %>%
    dplyr::select(id, viold) %>%
    distinct() %>%
    mutate()

art_v <- left_join(article_doc_topic, id_viold, by=c("document" = "id"))

art_v <- na.omit(art_v)

n_nv <- sum(art_v$viold==0) / lda_k
n_v <- sum(art_v$viold==1) / lda_k

art_v$viold <- as.factor(art_v$viold)

levels(art_v$viold) <- c(paste0("Articles about nonviolent protests (n=", add_comma(n_nv),")") , paste0("Articles about violent protests (n=", add_comma(n_v), ")" ) )

labels <- article_topics %>%
    group_by(topic) %>%
    top_n(15, beta) %>%
    arrange(topic, -beta) %>%
    ungroup() 

topic_label <- labels %>% 
    group_by(topic) %>%
    summarise(text=paste(term,collapse=', ')) 

linebreak <- function(text, width = 29) {
    text %>% 
        str_wrap(width) %>%
        paste(., collapse="\n")
}

new_labels <- map_chr(topic_label$text, linebreak)

new_labels <- paste0("Topic ", c(lda_k:1), ":\n[", new_labels, "]")



## ---- framing_tf_freq_analysis, eval=TRUE ----
#library(dplyr)

prot_words <- words %>%
    dplyr::count( viold, stem, sort = TRUE)  %>%
    group_by(viold) %>%
    mutate(total = sum(n) ) %>%
    mutate(tf = n/total ) %>%
    ungroup() %>%
    arrange(desc(n))

other_stop <- data.frame(stem = as.character(c("chicago", "alabama") ) )

prot_words <- anti_join(prot_words, other_stop )
    
stem_tf_nv <- prot_words %>%
    dplyr::filter(viold==0) %>%
    dplyr::select(stem, tf, n) %>%
    rename(tf0 = tf) %>%
    rename(n0 = n)


stem_tf_v <- prot_words %>%
    dplyr::filter(viold==1) %>%
    dplyr::select(stem, tf, n) %>%
    rename(tf1 = tf) %>%
    rename(n1 = n) 
    

stem_tf <- left_join(stem_tf_nv, stem_tf_v, by=c("stem" = "stem")) %>%
    mutate(ratio = (tf0/tf1) ) %>%
    mutate(log2_ratio = log2(tf0/tf1) ) 
    
#stem_tf_small <- stem_tf %>%
    #dplyr::filter(tf0 >= .003 | tf1 >= .002) %>%
    #dplyr::filter(n0 >= 3073 | n1 >= 850) %>%
    #dplyr::filter(n0 >= 2423 | n1 >= 622) 
    # dplyr::filter(n0 >= 2325 | n1 >= 584) 
      
stem_tf_nv_top <- stem_tf %>%
        top_n(n=35, n0)

stem_tf_v_top <- stem_tf %>%
        top_n(n=35, n1)

stem2 <- rbind(stem_tf_nv_top, stem_tf_v_top)
dupes <- which(duplicated(stem2$stem))
        
if(length(dupes)>0) stem2 <- stem2[-dupes, ]
    #dim(stem2)
    
#sum(stem2$log2_ratio < 0) # 15
#sum(stem2$log2_ratio > 0) # 17

#stem2_nv <- stem2 %>%
#        top_n(n=15, log2_ratio)

#stem2_v <- stem2 %>%
#        top_n(n=15, -log2_ratio)

#stem3 <- rbind(stem2_nv, stem2_v)
#stem_tf_small <- stem3         
              
stem2_nv <- stem2 %>%
        top_n(n=15, log2_ratio)

stem2_v <- stem2 %>%
        top_n(n=15, -log2_ratio)

stem3 <- rbind(stem2_nv, stem2_v)
stem_tf_small <- stem3 %>%
    distinct()

#diagnostics
# stem_tf_small %>% arrange(desc(log2_ratio)) %>% print(n=30)
# sum(stem_tf_small$log2_ratio < 0)
# sum(stem_tf_small$log2_ratio > 0)
    


## ---- framing_ratio_terms, eval=TRUE,  fig.height=6, fig.width=10, fig.pos="h!", fig.cap="Ratio of term frequencies in articles about protests coded as protester nonviolent or protester violent" ----
# cache.frame

# Custom hook for Note from:
# https://stackoverflow.com/questions/36047072/knitr-add-figure-notes

# restore regular plotting function
knit_hooks$set(plot = regular_plot)

# Custom knitr hook to add notes to the plot
#knit_hooks$set(plot = function(x, options) {
#    paste("\n\\end{kframe}\n\\begin{figure}[h!]\n",
#          "\\includegraphics[width=\\maxwidth]{",
#          opts_knit$get("base.url"), paste(x, collapse = "."),
#          "}\n",
#          "\\begin{minipage}{6in}\\small\\begin{flushleft}\\vspace*{2mm}
#\\textit{Note}: Ratio of top stemmed term frequencies in articles human-coded as nonviolent or violent.\#\end{flushleft}\\end{minipage}\\vspace*{2mm}
#",
#          "\\caption{", options$fig.cap, " \\label{", options$fig.lp, opts_current$get("label"), "}}\n",
#          "\n\\end{figure}\n\\begin{kframe}\n",
#          sep = '')
#})





p <- stem_tf_small %>%
    mutate(stem = reorder(stem, log2_ratio)) %>%
    arrange(desc(log2_ratio)) %>%
    ggplot(aes(stem, log2_ratio) ) +
    ylab(label = "Log2 ratio of term frequencies in articles (protester nonviolent / protester violent)") +
    xlab(label = "") +
    #ylab(label = "Ratio of term frequencies") +
    #Words with the greatest difference in beta between topic 2 and topic 1
    #ggtitle(label = "Ratio of term frequencies in articles about protests human-coded as nonviolent or violent") +
    geom_col(show.legend = FALSE) +
    #facet_wrap(~ viold, scales = "free") +
    #theme_apsr_grid + 
    #scale_y_continuous(trans=log2_trans()) +
    #coord_trans(y="log2") +
    coord_flip()

typesize <- 16

p <- p + theme(
       #plot.title = element_text(size = typesize),
       text = element_text(size = typesize) #,
       #axis.text.x = element_text(
       #         angle = 0,
       #         hjust = 0,
       #         vjust = 0
       #    )
     )

typeadjust <- -2

text_nv  <- 
        textGrob("Protesters nonviolent",
                 gp = gpar(fontsize = typesize + typeadjust , col = "grey20"), rot=90)

text_v  <- 
        textGrob("Protesters violent",
                 gp = gpar(fontsize = typesize + typeadjust , col = "grey20"), rot=90)

text_ylab  <- 
        textGrob("Top stemmed terms in articles coded as:",
                 gp = gpar(fontsize = typesize + typeadjust , col = "grey20"), rot=90)

p2 <- p + 
    #theme_apsr_grid +
    theme(
        panel.grid.major = element_line(size = .5, color = "grey90"),
        axis.text = element_text(size = 12),
        strip.text = element_text(size=12, lineheight=0.5),
        panel.grid.minor = element_line(size = .25, color = "grey90"),
        
        plot.margin = unit(c(1,1,1,4), "lines")   # Make room for the grob
    )

yshift            <- -2.32
yline_offset      <- .075
small_line_offset <- .04
miny              <- 1
maxy              <- nrow(stem_tf_small)
halfy             <- maxy / 2
midy              <- (maxy + 1) / 2
#span             <- (maxy / 2) + 1
bottom_min        <- miny
bottom_max        <- halfy
bottom_mid        <- (halfy + 1) / 2

upper_min <- halfy + 1
upper_max <- maxy
upper_mid <- (upper_min - 1) + ((halfy +1) / 2)


p3 <- p2 + annotation_custom(
            text_nv,
            xmin = upper_min,
            xmax = upper_max,
            ymin = yshift,
            ymax = yshift
        ) +
           annotation_custom(
            text_v,
            xmin = bottom_min,
            xmax = bottom_max,
            ymin = yshift,
            ymax = yshift
        ) +
           annotation_custom(
            text_ylab,
            xmin = midy - 2,
            xmax = midy + 2,
            ymin = yshift-.15,
            ymax = yshift-.15
        )

# long vertical lines
p4 <- p3 + annotation_custom(
            grob = linesGrob(),
            xmin =  bottom_min,
            xmax =  bottom_max,
            ymin = yshift + yline_offset,
            ymax = yshift + yline_offset 
            ) +
            annotation_custom(
            grob = linesGrob(),
            xmin =  upper_min,
            xmax =  upper_max,
            ymin = yshift + yline_offset  ,
            ymax = yshift + yline_offset
            )
# short horizontal lines
p5 <- p4 + annotation_custom(
            grob = linesGrob(),
            xmin =  bottom_min,
            xmax =  bottom_min,
            ymin = yshift + yline_offset,
            ymax = yshift + yline_offset + small_line_offset 
            ) +
            annotation_custom(
            grob = linesGrob(),
            xmin =  bottom_max,
            xmax =  bottom_max,
            ymin = yshift + yline_offset ,
            ymax = yshift + yline_offset + small_line_offset
            ) + 
         annotation_custom(
            grob = linesGrob(),
            xmin =  upper_min,
            xmax =  upper_min,
            ymin = yshift + yline_offset,
            ymax = yshift + yline_offset + small_line_offset 
            ) +
            annotation_custom(
            grob = linesGrob(),
            xmin =  upper_max,
            xmax =  upper_max,
            ymin = yshift + yline_offset ,
            ymax = yshift + yline_offset + small_line_offset
            ) +
        annotation_custom(
            grob = linesGrob(),
            xmin =  bottom_mid,
            xmax =  bottom_mid,
            ymin = yshift + yline_offset,
            ymax = yshift + yline_offset - (small_line_offset/2) 
            ) +
            annotation_custom(
            grob = linesGrob(),
            xmin =  upper_mid,
            xmax =  upper_mid,
            ymin = yshift + yline_offset ,
            ymax = yshift + yline_offset - (small_line_offset/2)
            )


# Code to override clipping
gt <- ggplot_gtable(ggplot_build(p5))
gt$layout$clip[gt$layout$name == "panel"] <- "off"
grid.draw(gt)


## ---- eval=FALSE ----
## add_comma(nrow(id_viold))
## sum(id_viold$viold==0)
## sum(id_viold$viold==1)


## ---- load_dca_for_nyt_models, include = FALSE ----
load(here("data/DCA_protest_data.Rdata"), verbose = TRUE)


## ---- nyt_poisson_models ----


dca <- protest.data %>%
    filter(igrp1c1 == 401 | igrp1c2 == 401 | 
               igrp2c1 == 401 | igrp2c2 == 401,
           evyy >= 1960 & evyy <= 1972)

dca <- dca %>%
    mutate(pr_st = case_when(
        viold == 0 & police4 == 0 ~ 1,
        viold == 0 & police4 == 1 ~ 2, 
        viold == 1 & police4 == 0 ~ 3,
        viold == 1 & police4 == 1 ~ 4
    )
    )

# updated for plot
dca <- dca %>%
    mutate(
      protester_violence = factor(viold),
      police_violence = case_when(
        police3 == 1    ~ 1,
        police4 == 1    ~ 2,
        TRUE            ~ 0
    ),
  state = factor(state1),
  year = factor(evyy),
  paragraph = paragrph
  )

# p1 <- glm(stories ~ police4 + viold + as.factor(state1) + as.factor(evyy), 
#           family = "poisson", data = dca) 
# p2 <- glm(paragrph ~ police4 + viold + as.factor(state1) + as.factor(evyy), 
#           family = "poisson", data = dca) 
# p3 <- glm(page ~ police4 + viold + as.factor(state1) + as.factor(evyy), 
#           family = "poisson", data = dca) 

p1 <- glm(stories ~ police_violence * protester_violence + year,
          family = poisson, data = dca) 

p2 <- glm(paragraph ~ police_violence * protester_violence + year, 
          family = poisson, data = dca) 

p3 <- glm(page ~ police_violence * protester_violence + year, 
          family = poisson, data = dca) 

plist <- list(p1, p2, p3)

# g1 <- glm(stories ~ pr_st + as.factor(state1) + as.factor(evyy), family = "poisson", data = dca) 
# g2 <- glm(paragrph ~ pr_st + as.factor(state1) + as.factor(evyy), family = "poisson", data = dca) 
# g3 <- glm(page ~ pr_st + as.factor(state1) + as.factor(evyy), family = "poisson", data = dca) 






## ---- nyt_marginal_effects_plots, echo = FALSE, fig.width=9, fig.height=3, fig.pos="H", fig.cap="Marginal Effects of Police and Protester Violence on \\textit{New York Times Coverage}", message = FALSE ----

# Custom hook for Note from:
# https://stackoverflow.com/questions/36047072/knitr-add-figure-notes

# restore regular plotting function
knit_hooks$set(plot = regular_plot)

x_text <- .7

# deprecated colors_two based on colors_four
#colors_two <- colors_four[c(2,4)]
#colors_plot_model <- "black"
colors_plot_model <- colors_two

pl1 <- plot_model(
    p1,
    type   = "pred",
    terms  = c("police_violence", "protester_violence"),
    title  = "",
    colors = colors_plot_model
) +
    xlab("Police Violence") +
    ylab("Predicted # of Articles") +
    annotate(
        "text",
        label  = "Protesters\nviolent",
        x      = x_text + .1 ,
        y      = 2.6,
        size   = 4,
        colour = "grey29"
    ) +
    annotate(
        "text",
        label  = "Protesters\nnonviolent",
        x      = x_text + .9,
        y      = 1.1,
        size   = 4,
        colour = "grey29"
    ) +
    scale_x_continuous(#limits = c(0, 2),
        breaks = c(0, 1,  2),
        labels = c("Low", "", "High")) +
    scale_y_continuous(limits = c(.8, 3.7)) +
    theme_cowplot() +
    theme(legend.position = "none")


pl2 <-
    plot_model(
        p2,
        type   = "pred",
        terms  = c("police_violence", "protester_violence"),
        title  = "",
        colors = colors_plot_model
    ) +
    xlab("Police Violence") +
    ylab("Predicted # of Paragraphs") +
    annotate(
        "text",
        label  = "Protesters\nviolent",
        x      = x_text,
        y      = 13.8,
        size   = 4,
        colour = "grey29"
    ) +
    annotate(
        "text",
        label  = "Protesters\nnonviolent",
        x      = x_text + .75,
        y      = 8,
        size   = 4,
        colour = "grey29"
    ) +
    scale_x_continuous(breaks = c(0, 1,  2),
                       labels = c("Low", "", "High")) +
    theme_cowplot() +
    theme(legend.position = "none")


pl3 <-
    plot_model(
        p3,
        type   = "pred",
        terms  = c("police_violence", "protester_violence"),
        title  = "",
        colors = colors_plot_model
    ) +
    xlab("Police Violence") +
    ylab("Predicted Page #") +
    annotate(
        "text",
        label  = "Protesters\nviolent",
        x      = x_text,
        y      = 20,
        size   = 4,
        colour = "grey29"
    ) +
    annotate(
        "text",
        label  = "Protesters\nnonviolent",
        x      = x_text + .7,
        y      = 25.3,
        size   = 4,
        colour = "grey29"
    ) +
    scale_x_continuous(breaks = c(0, 1,  2),
                       labels = c("Low", "", "High   ")) +
    theme_cowplot() +
    theme(legend.position = "none")



cowplot::plot_grid(pl1, pl2, pl3, labels = 'AUTO', align = 'h', ncol = 3) 

theme_apsr <- theme_update(
      panel.grid.major = element_line(size = .5, color = "grey90"),
      strip.text = element_text(size=10, lineheight=0.5)
    ) 



## ---- nyt_marginal_effect_coef_plot, echo = FALSE ----

p_df <- list(NA) 

# extract average marginal effects
for (i in seq_along(1:3)) {
  p_df[[i]] <- plist[[i]] %>% 
      margins(model = ., 
              variables = c("police_violence", "protester_violence")) %>% 
      summary() %>% 
      slice(1:2) # police & protester violence 
  }

p2_df <- do.call(rbind, p_df) %>%
    mutate(labels = rep(c("Police violence", "Protester violence"), 3),
           outcome = rep(c(". Number of articles", ". Number of paragraphs", 
                           ". Page number"), each = 2) ) %>%
    rename(se = SE) %>%
    arrange(desc(labels) ) %>%
    mutate(outcome = paste0(1:6, outcome))


#p2_df

interval1 <- -qnorm((1 - 0.9) / 2) # 90 percent multiplier
interval2 <- -qnorm((1 - 0.95) / 2) # 95 percent multiplier


p <- ggplot(p2_df) + 
       aes(x = outcome, y = AME) +

    geom_hline(
      yintercept = 0, 
      col = "darkgray") +

  geom_linerange(aes(
    x = outcome,
    ymin = AME - se * interval1,
    ymax = AME + se * interval1
  ),
  lwd = 1.,
  position = position_dodge(width = 1 / 2),
  alpha = .5
  ) +

  geom_linerange(aes(
    x = outcome,
    ymin = AME - se * interval2,
    ymax = AME + se * interval2
  ),
  lwd = .5,
  alpha = .4
  ) +

  geom_point(aes(
      x = outcome, 
      y = AME),
    size = 2.2,
    shape = 19,
    alpha = .6
  ) + # ,fill = "black" +

    #theme_apsr_grid +
    coord_flip() +
    xlab("") +
    ylab("Change in media coverage") + 
    scale_x_discrete(limits = (rev(p2_df$outcome)))

p2 <- p + theme(plot.margin = unit(c(1,1,1,4), "lines"))   # Make room for the grob

yshift <-  -10.5
yline_offset <- .75
small_line_offset <- .1
miny <- 1
maxy <- nrow(p2_df)

halfy <- maxy / 2
midy <- (maxy + 1) / 2
#span <- (maxy / 2) + 1
bottom_min <- miny
bottom_max <- halfy
bottom_mid <- (halfy + 1) / 2

upper_min <- halfy + 1
upper_max <- maxy
upper_mid <- (upper_min - 1) + ((halfy +1) / 2)

typesize <- 13
typeadjust <- -2

text_top <- 
    textGrob("Protester \n violence",
                 gp = gpar(fontsize = typesize + typeadjust , 
                           col = "grey20"), 
             rot=90)

text_bottom <- 
        textGrob("Police \n violence",
                 gp = gpar(fontsize = typesize + typeadjust , 
                           col = "grey20"), 
                 rot=90)



p3 <- p2 + annotation_custom(
            text_top,
            xmin = upper_min,
            xmax = upper_max,
            ymin = yshift,
            ymax = yshift
        ) +
           annotation_custom(
            text_bottom,
            xmin = bottom_min,
            xmax = bottom_max,
            ymin = yshift,
            ymax = yshift
        ) 
           
# long vertical lines
p4 <- p3 + annotation_custom(
            grob = linesGrob(),
            xmin =  bottom_min,
            xmax =  bottom_max,
            ymin = yshift + yline_offset,
            ymax = yshift + yline_offset 
            ) +
            annotation_custom(
            grob = linesGrob(),
            xmin =  upper_min,
            xmax =  upper_max,
            ymin = yshift + yline_offset  ,
            ymax = yshift + yline_offset
            )

# short horizontal lines
p5 <- p4 + annotation_custom(
            grob = linesGrob(),
            xmin =  bottom_min,
            xmax =  bottom_min,
            ymin = yshift + yline_offset,
            ymax = yshift + yline_offset + small_line_offset 
            ) +
            annotation_custom(
            grob = linesGrob(),
            xmin =  bottom_max,
            xmax =  bottom_max,
            ymin = yshift + yline_offset ,
            ymax = yshift + yline_offset + small_line_offset
            ) + 
         annotation_custom(
            grob = linesGrob(),
            xmin =  upper_min,
            xmax =  upper_min,
            ymin = yshift + yline_offset,
            ymax = yshift + yline_offset + small_line_offset 
            ) +
            annotation_custom(
            grob = linesGrob(),
            xmin =  upper_max,
            xmax =  upper_max,
            ymin = yshift + yline_offset ,
            ymax = yshift + yline_offset + small_line_offset
            ) +
        annotation_custom(
            grob = linesGrob(),
            xmin =  bottom_mid,
            xmax =  bottom_mid,
            ymin = yshift + yline_offset,
            ymax = yshift + yline_offset - (small_line_offset) 
            ) +
            annotation_custom(
            grob = linesGrob(),
            xmin =  upper_mid,
            xmax =  upper_mid,
            ymin = yshift + yline_offset ,
            ymax = yshift + yline_offset - (small_line_offset)
            )





# theme(
#   #plot.title = element_text(size = 18),
#   text = element_text(size = 18),
#   axis.text.x = element_text(angle = 0)
# ) + 
#     coord_flip()  

# Code to override clipping
gt <- ggplot_gtable(ggplot_build(p5))
gt$layout$clip[gt$layout$name == "panel"] <- "off"
grid.draw(gt)


## ---- framing_case_2_vs_3_load_data, include = FALSE ----

load(here("data/framing-data-random_subsample_all_words_no_nyt_four_cases.Rdata")) 


## ---- framing_case_2_vs_3, fig.height=6, fig.width=10, fig.pos="h!", fig.cap="Ratio of term frequencies in articles (protesters nonviolent and state violent / protesters violent and state nonviolent)"  ----


# cache.frame

# Custom hook for Note from:
# https://stackoverflow.com/questions/36047072/knitr-add-figure-notes

# restore regular plotting function
knit_hooks$set(plot = regular_plot)



pr_st <- words %>%
    mutate(ps2 = case_when(ps_type == 1 ~ as.numeric(NA),
                           ps_type == 2 ~ 1, #as.numeric(NA), #1,
                           ps_type == 3 ~ 2,
                           ps_type == 4 ~ as.numeric(NA), #2,
                           TRUE ~ as.numeric(NA)) ) %>%
    filter(stem != "negro",
           stem != "citi"
           ) %>%
    count( ps2, stem, sort = TRUE)  %>%
    group_by(ps2) %>%
    mutate(total = sum(n) ) %>%
    mutate(tf = n/total ) %>%
    ungroup() %>%
    arrange(desc(n))


# pr_st <- pr_st %>%
#     anti_join(geo_stop_words) %>%
#     anti_join(data_frame(stem = c("lou_sil"))) # louisville


stem_tf_12 <- pr_st %>%
    filter(ps2 == 1) %>%
    dplyr::select(stem, tf, n) %>%
    rename(tf12 = tf) %>%
    rename(n12 = n) %>%
    arrange(tf12) %>%
    top_n(60) %>%
    identity()

stem_tf_34 <- pr_st %>%
    filter(ps2 == 2 ) %>%
    dplyr::select(stem, tf, n) %>%
    rename(tf34 = tf) %>%
    rename(n34 = n) %>%
    arrange(tf34) %>%
    top_n(60) %>%
    #filter(n34 > quantile(n34, .99)) %>%
    identity()

stem_tf <- inner_join(stem_tf_12, stem_tf_34, by=c("stem" = "stem")) %>%
    mutate(ratio = (tf12/tf34) ) %>%
    mutate(log2_ratio = log2(tf12/tf34) ) 

#dim(stem_tf)
l <- nrow(stem_tf)
n <- 15

typesize <- 16

p <- stem_tf %>%
    #arrange(log2_ratio) %>%
    filter(ratio <= .95 | ratio >= 1.05) %>% # filter out 1 or near equal ratios
    arrange(desc(ratio)) %>%
    slice(c(1:n, (l-n-2):l )) %>%
    #slice(c(1:30)) %>%
    mutate(stem = reorder(stem, ratio)) %>%
    ggplot(aes(stem, log2_ratio) ) +
    geom_col(show.legend = FALSE) +
    xlab(NULL) +
    ylab(label = "Log2 ratio of term frequencies (protesters nv & state v / protesters v & state nv)") +
    #ggtitle(label = "Ratio of term frequencies between articles coded as nonviolent or violent protests") +
    coord_flip()


# p <- p + 
#      )

typeadjust <- -2

text_pnv_sv  <- 
        textGrob("Protesters nv and state v",
                 gp = gpar(fontsize = typesize + typeadjust , col = "grey20"), rot=90)

text_pv_snv  <- 
        textGrob("Protesters v and state nv",
                 gp = gpar(fontsize = typesize + typeadjust , col = "grey20"), rot=90)

text_ylab  <- 
        textGrob("Top stemmed terms in articles coded as:",
                 gp = gpar(fontsize = typesize + typeadjust , col = "grey20"), rot=90)

p2 <- p + 
    #theme_apsr_grid +
    theme(
        panel.grid.major = element_line(size = .5, color = "grey90"),
        axis.text = element_text(size = 12),
        strip.text = element_text(size=12, lineheight=0.5),
        panel.grid.minor = element_line(size = .25, color = "grey90"),
        
        text = element_text(size = typesize),
        
        plot.margin = unit(c(1,1,1,4), "lines")  # Make room for the grob
        ) 

yshift <- -2.8
yline_offset <- .075
small_line_offset <- .04
miny <- 1
maxy <- 30 # nrow(stem_tf)
halfy <- maxy / 2
midy <- (maxy + 1) / 2
#span <- (maxy / 2) + 1
bottom_min <- miny
bottom_max <- halfy
bottom_mid <- (halfy + 1) / 2

upper_min <- halfy + 1
upper_max <- maxy
upper_mid <- (upper_min - 1) + ((halfy +1) / 2)


p3 <- p2 + annotation_custom(
            text_pnv_sv,
            xmin = upper_min,
            xmax = upper_max,
            ymin = yshift,
            ymax = yshift
        ) +
           annotation_custom(
            text_pv_snv,
            xmin = bottom_min,
            xmax = bottom_max,
            ymin = yshift,
            ymax = yshift
        ) +
           annotation_custom(
            text_ylab,
            xmin = midy - 2,
            xmax = midy + 2,
            ymin = yshift-.15,
            ymax = yshift-.15
        )

# long vertical lines
p4 <- p3 + annotation_custom(
            grob = linesGrob(),
            xmin =  bottom_min,
            xmax =  bottom_max,
            ymin = yshift + yline_offset,
            ymax = yshift + yline_offset 
            ) +
            annotation_custom(
            grob = linesGrob(),
            xmin =  upper_min,
            xmax =  upper_max,
            ymin = yshift + yline_offset  ,
            ymax = yshift + yline_offset
            )
# short horizontal lines
p5 <- p4 + annotation_custom(
            grob = linesGrob(),
            xmin =  bottom_min,
            xmax =  bottom_min,
            ymin = yshift + yline_offset,
            ymax = yshift + yline_offset + small_line_offset 
            ) +
            annotation_custom(
            grob = linesGrob(),
            xmin =  bottom_max,
            xmax =  bottom_max,
            ymin = yshift + yline_offset ,
            ymax = yshift + yline_offset + small_line_offset
            ) + 
         annotation_custom(
            grob = linesGrob(),
            xmin =  upper_min,
            xmax =  upper_min,
            ymin = yshift + yline_offset,
            ymax = yshift + yline_offset + small_line_offset 
            ) +
            annotation_custom(
            grob = linesGrob(),
            xmin =  upper_max,
            xmax =  upper_max,
            ymin = yshift + yline_offset ,
            ymax = yshift + yline_offset + small_line_offset
            ) +
        annotation_custom(
            grob = linesGrob(),
            xmin =  bottom_mid,
            xmax =  bottom_mid,
            ymin = yshift + yline_offset,
            ymax = yshift + yline_offset - (small_line_offset/2) 
            ) +
            annotation_custom(
            grob = linesGrob(),
            xmin =  upper_mid,
            xmax =  upper_mid,
            ymin = yshift + yline_offset ,
            ymax = yshift + yline_offset - (small_line_offset/2)
            )


# Code to override clipping
gt <- ggplot_gtable(ggplot_build(p5))
gt$layout$clip[gt$layout$name == "panel"] <- "off"
grid.draw(gt)




## ---- detach_tidyverse , echo=FALSE ----
detach("package:topicmodels", character=TRUE, unload=TRUE)
detach("package:tidytext",    character=TRUE, unload=TRUE)
detach("package:purrr",       character=TRUE, unload=TRUE)


## ---- reload_lmtest ----
library("lmtest")


## ---- granger_calc_load_data, include = FALSE ----
# load congress corpus civil rights and social control indices    
load(here("data/congress_counts3_new.Rdata") )

# read polls data
polls.gr <- read.csv(here("data/polls1950-1979-date.csv") )

# read Carter violent protest data
carter_vp <- read_csv(here("data/Carter_data/carter_violent_protests_geocoded.csv"))

# load DCA protest data
load(here("data/DCA_protest_data.Rdata"), verbose = TRUE )


## ---- granger_calc, echo = FALSE, warning = FALSE, error = FALSE ----

names(polls.gr) <- tolower(names(polls.gr)) 

polls.gr$date <- as.Date(polls.gr$date, format="%m/%d/%Y")

# divide by 100
polls.gr[,c("foreign.affairs", "economic", "social.control", "civil.rights")] <- polls.gr[,c("foreign.affairs", "economic", "social.control", "civil.rights")]*.01

# ensure date format for variable
carter_vp$date <- as.Date(carter_vp$date)


# which dates are missing
miss <- which(is.na(protest.data$event_date))

# if report day is missing, insert 1
protest.data$rptdd[is.na(protest.data$rptdd)] <- 1

# create alternate date using report dates
protest.data$ed2 <- ymd(paste(protest.data$rptyy, protest.data$rptmm, protest.data$rptdd, sep="-"))

# for missing event_dates, replace with report date
protest.data$event_date[miss] <- protest.data$ed2[miss]

# remove any remaining missing dates
miss <- which(is.na(protest.data$event_date))
if(length(miss) > 0) protest.data <- protest.data[-miss, ]





# set global parameters for imputing missing data

period.unit <- "month"

protest.method <- 0

elite.method <- "linear"

start <- "1960-01-01"
end   <- "1972-12-31"

opinion.method <- "spline"

## mean of both polls.gr$civil.rights and polls.gr$social.control is ~.15





cr.mon <-
    periodize2(
        data = polls.gr$civil.rights,
        date = polls.gr$date,
        unit = period.unit,
        method = opinion.method,
        func = "mean"
    )
#plot(cr.mon$data)

sc.mon <-
    periodize2(
        data = polls.gr$social.control,
        date = polls.gr$date,
        unit = period.unit,
        method = opinion.method,
        func = "mean"
    )
#plot(sc.mon$data)


# prepare nonviolent data
nv <- protest.data[protest.data$igrp1c1==401 & protest.data$v2==0, c("part3", "event_date")] 

nv <- na.omit(nv)
nv$part3 <- nv$part3 / 100000

nv.mon <-
    periodize2(
        data = nv$part3,
        date = nv$event_date,
        unit = period.unit,
        method = protest.method,
        func = "sum"
    )
#plot(nv.mon$data)

# prepare violent protest data
vO <- protest.data[protest.data$igrp1c1==401 & protest.data$v2==1, c("part3", "event_date")]

vO <- na.omit(vO)
vO$part3 <- vO$part3 / 100000
#print(head(vO))

vO.mon <-
    periodize2(
        data = vO$part3,
        date = vO$event_date,
        unit = period.unit,
        method = protest.method,
        func = "sum"
    )
#plot(vO.mon$data)
#print(head(vO.mon))

carter_vp$newarr <- carter_vp$newarr / 100000

vC.mon <-
    periodize2(
        data   = carter_vp$newarr,
        date   = carter_vp$date,
        unit   = period.unit,
        method = protest.method,
        func   = "sum"
    )
#plot((vC.mon$data))






p.mon <- merge(nv.mon, vO.mon, vC.mon)
#head(p.mon)

all.data <- merge(cr.mon, sc.mon, p.mon)
#head(all.data)



sc_index   <- congress_counts_by_day %>% 
    mutate(term = "social control index") %>% 
    select(term, date, social_control_count) %>% 
    rename(count = social_control_count)
    
cr_index   <- congress_counts_by_day %>% 
    mutate(term = "civil rights index") %>% 
    select(term, date, rights_count) %>% 
    rename(count = rights_count)

riot_index <- congress_counts_by_day %>% 
    mutate(term = "riot or antiriot") %>% 
    select(term, date, riots_count) %>% 
    rename(count = riots_count)

# periodize
elite.method <- "linear"

elite.control <-
    periodize2(
        data = sc_index$count,
        date = sc_index$date,
        unit = period.unit,
        method = elite.method,
        func = "sum"
    )
#plot(elite.control$data)

elite.civil <-
    periodize2(
        data = cr_index$count,
        date = cr_index$date,
        unit = period.unit,
        method = elite.method,
        func = "sum"
    )
#plot(elite.civil$data)

elite.riot <-
    periodize2(
        data = riot_index$count,
        date = riot_index$date,
        unit = period.unit,
        method = elite.method,
        func = "sum"
    )
#plot(elite.riot$data)

all2 <- merge(all.data, elite.civil, elite.control, elite.riot)




diffs <- grep("diff", names(all2) , value = TRUE)



all.nocr <- all2

all.approx <- na.approx(all.nocr)




## ---- reload_libraries, include = FALSE ----
opts_chunk$set(echo=FALSE)

library(lmtest)
library(tidyr)
library(dplyr)
library(stringr)
library(imputeTS)
library(xtable)
library(rlang)
library(forecast)
library(lubridate)
library(purrr)


## ---- load_data_granger ----

load(here("data/all_data_attica_congress_no_nyt.Rdata") )


## ---- load_granger_functions, echo=FALSE ----

# From https://stackoverflow.com/questions/31459651/converting-date-to-monday-of-that-week
weekify <- function(x){
        7 * floor(as.numeric(x-1+4) / 7) + as.Date(1-4,origin = "1970-01-01")
    }



all_data$week <- weekify(all_data$date)

monthify <- function(dates) {as.Date(cut(dates,
                                         breaks = "month")) }


dataprep <- function(data, var1, var2, start, end, period = date, filler=0, verbose=FALSE){
    
    require(rlang)
    
    period_quo  <- enquo(period)        # convert contents of term to usable object
    period_name <- quo_name(period_quo) # convert object to string
    
    if(verbose==TRUE) print(period_quo)
    if(verbose==TRUE) print(period_name)
    
    data_wide <- data %>%
    dplyr::filter(date >= as.Date(start) & 
           date <= as.Date(end) 
           ) %>%
    dplyr::filter(type_cat == var1 | 
           type_cat == var2
               ) %>%
    group_by(!!period_quo, type_cat) %>%
    rename(period = !!period_quo) %>%
    summarize(value = sum(value)) %>%
    ungroup() 
    
    days <- seq(from=as.Date(start), 
                to=as.Date(end), 
                by="day")
    
    if(period_name=="week") days <- weekify(days)
    if(period_name=="month") days <- monthify(days)

    l <- length(days)
    #print(l)

    #var1_short <- var1 #str_match(var1, "([a-z]*)_" )[2]
    #var2_short <- var2 #str_match(var2, "([a-z]*)_" )[2]

    all_days <- data_frame(
        period = rep(days, 2), 
        type_cat=as.factor(rep(c(var1, var2), each=l)), 
        value=filler)
    
    if(verbose==TRUE) print(head(all_days))
    if(verbose==TRUE) print(head(data_wide))

    #print(xtable(head(tbl_df(all_days))))
   
    data_wide <- data_wide %>%
        rbind(all_days) %>%
        group_by(period, type_cat) %>%
        summarize(value = sum(value)) %>%
        ungroup()

    data2 <- data_wide %>%
        #mutate(key_cat = paste(type_cat, category, sep="_") ) %>%
        spread(key=type_cat, value=value, fill=filler)
    
    return(data2)

}


gt2 <- function(data, order = 1, verbose=FALSE) {
    
    nms <- names(data)
    var1 <- nms[2]
    var2 <- nms[3]
    #print(nms)
    #print(var1)
    #data2 <- as.data.frame(apply(data[,2:3], MARGIN = 2, FUN = diff))
    
    #print(head(data2))
    gr_out <- list(vars = c(var1, var2), g1 = NA, g2 = NA)
    gr_out$g1 <- eval(parse(text=paste0("grangertest(", var1,  "~", var2 ,", order = ", order, ", data = data)" ) ) ) 
    
    gr_out$g2 <- eval(parse(text=paste0("grangertest(", var2,  "~", var1 ,", order = ", order, ", data = data)" ) ) ) 
    return(gr_out)
}


nameify <- function(x){
    x <- x %>%
        str_replace_all("diff.cr.mon|[d]*polls_civil_rights", "public opinion about civil rights") %>%
        str_replace_all("diff.sc.mon|[d]*polls_social_control", "public opinion about `social control'") %>%
        str_replace_all("diff.nv.mon|[d]*protests_nonviolent", "nonviolent protest activity") %>%
        str_replace_all("diff.vO.mon|[d]*protests_violent$", "violent protest activity (DCA data)") %>%
        str_replace_all("diff.vC.mon|[d]*protests_violent2$", "violent protest activity (Carter data)") %>%
        str_replace_all("diff.elite.civil", "elite discourse about civil rights") %>%
        str_replace_all("diff.elite.control", "elite discourse about crime and `riots'") %>%
        str_replace_all("diff.elite.riot", "elite discourse about `riots'") %>%
        str_replace_all("[d]*news_rights", "front page news about civil rights") %>%
        str_replace_all("[d]*news_riot", "front page news about `riots'") %>%
        str_replace_all("[d]*congress_rights", "Congressional speech about rights") %>%
        str_replace_all("[d]*congress_riot", "Congressional speech about `riots'") 
    return(x)
}


caption2 <- function( gr_obj, order = 1) {
    var1 <- gr_obj$vars[1]
    var2 <- gr_obj$vars[2]
    
    period_unit <- function(gt_obj) {
        case_when(
        gt_obj[[1]][1] <= 156 ~ "month",
        gt_obj[[1]][1] > 156 & gt_obj[[1]][1] <= 679 ~ "week",
        gt_obj[[1]][1] > 679  ~ "day"
    )
    }

    period_unit <- as.character(
        period_unit(gr_obj[[2]][1,1]))
    #period_unit2 <- as.character(
        #period_unit(gr_obj[[3]][1,1]))

    captions <- NA

    if(gr_obj$g1[[4]][2] <= 0.05) psentence1 <- "We reject the null hypothesis" else
        psentence1 <- "We fail to reject the null hypothesis"

    if(gr_obj$g2[[4]][2] <= 0.05) psentence2 <- "We reject the null hypothesis" else
        psentence2 <- "We fail to reject the null hypothesis"
    
    captions <- c(" ", " ")
    captions[1] <- paste0("Does ", nameify(var2), 
                          " Granger-cause ", nameify(var1) ,
                          "? ", psentence1, " (", p_val(gr_obj$g1[[4]][2]),
                          ", lag of", 
                          c(" one ", " two ", " three ")[order], 
                          paste0(period_unit, c("", "s", "s")[order]),").")

    captions[2] <- paste0("Does ", nameify(var1), 
                          " Granger-cause ", nameify(var2) ,
                          "? ", psentence2, " (", p_val(gr_obj$g2[[4]][2]),
                          ", lag of", 
                          c(" one ", " two ", " three ")[order], 
                          paste0(period_unit, c("", "s", "s")[order]),").")
    
    return(captions)
}


table2 <- function(gr_obj, order = order, verbose=FALSE){
    
    #if(is.na(print[1]) ) print <- 1:(length(gr_obj)-1)
    captions <- caption2(gr_obj, order = order)
    
    if(verbose==TRUE) print(captions)

    print(xtable(gr_obj$g1, 
             caption = captions[1]), 
             size="\\small"#, 
             #table.placement = getOption("xtable.table.placement", "H"), 
             #floating=FALSE
             ) 
    print(xtable(gr_obj$g2, caption = captions[2]), size="\\small" )

    # for (i in print){
    #     
    #     print(xtable(g[[1]][[i]], caption = g[[2]][[i]]), size="\\small" )
    #     
    # }
}

# set to none as tseries::adf.tests suggested no problems with stationarity
gr_kpss <- function(data, ndiff="none", verbose=FALSE) { 
    
    kpss <- NA
    kpss[1] <- ndiffs(data[[2]], alpha=0.05, test="pp")
    kpss[2] <- ndiffs(data[[3]], alpha=0.05, test="pp")

    if(verbose==TRUE) print(paste(names(data)[2], kpss[1] ) )
    if(verbose==TRUE) print(paste(names(data)[3], kpss[2] ) )

    nms <- names(data)
    nms2 <- c(nms, paste0("d",nms[2]), paste0("d",nms[3]))
    
    data$var3 <- c(diff(data[[2]]), NA) 
    data$var4 <- c(diff(data[[3]]), NA)

    names(data) <- nms2
    
    if(verbose==TRUE) print(head(data))
    
    if(ndiff == "both")     
        {data <- data[ , -c(2,3)] }
    if(ndiff == "auto")     
        {
        if(kpss[1]==0 & kpss[2]==0) data <- data[ , c(1,2,3)] 
        if(kpss[1]==1 & kpss[2]==0) data <- data[ , c(1,4,3)]
        if(kpss[1]==0 & kpss[2]==1) data <- data[ , c(1,2,5)]
        if(kpss[1]==1 & kpss[2]==1) data <- data[ , c(1,4,5)]
        }
   if(ndiff == "none")     
        {
        data <- data[ , c(1,2,3)] 
        }
 
    
    return(list(kpss = paste(nms[2:3], kpss), data = data) )
}


gr_all <- function(data, var1, var2, start, end, period = date, order=1, filler=0, ndiff="none", verbose=FALSE){
    
    period_quo <- enquo(period) # convert contents of term to usable object
    
    # parse data to subset with two variables and specific time period 
    d2 <- dataprep(data, 
                var1 = var1, 
                var2 = var2, 
                start= start, 
                end    = end, 
                period = !!period_quo,
                filler = filler,
                verbose= verbose)
    
    if(verbose==TRUE) print(head(d2))
    
    # run kpss test
    kp_out <- gr_kpss(d2, ndiff=ndiff, verbose=verbose)

    #print(paste(c("%% KPSS test:", kp_out$kpss) ) )
    
    d3 <- gt2(kp_out$data, order=order, verbose=verbose)

    if(verbose==TRUE) print(head(d3))

    table2(d3, order=order, verbose=verbose)
    
    #return(d3)
}





## ---- set_up_granger_df, echo=FALSE ----

#vars <- levels(all_data$type_cat)

nv_cr <- c("polls_civil_rights", "news_rights", "protests_nonviolent")
v1_sc <- c("polls_social_control", "news_riot", "protests_violent")
v2_sc <- c("polls_social_control", "news_riot", "protests_violent2")

pairs <- as.data.frame(t(combn(3, 2)))

nv_cr_pairs <- map_dfr(pairs, function(x) nv_cr[x])
v1_sc_pairs <- map_dfr(pairs, function(x) v1_sc[x])
v2_sc_pairs <- map_dfr(pairs, function(x) v2_sc[x])

pairs_news_df <- rbind(nv_cr_pairs, v1_sc_pairs, v2_sc_pairs)
names(pairs_news_df) <- c("var1", "var2")

polls_indicator <- str_detect(pairs_news_df$var1, "polls") + 1
#v_indicator <- str_detect(pairs_news_df$var2, "riot|_violent") + 1
v2_indicator <- str_detect(pairs_news_df$var2, "_violent2$") + 1

pairs_news_df$time_unit <- c("date", "month")[polls_indicator]
pairs_news_df$start_date <- c(as.Date("1960-01-01"), as.Date("1964-01-01") )[v2_indicator]    
pairs_news_df$end_date <- c(as.Date("1972-12-31"), as.Date("1971-12-31") )[v2_indicator]



## ---- set_up_granger_df2, echo=FALSE ----

nv_cr_cong_vars <- c("polls_civil_rights", "congress_rights", "protests_nonviolent")
v1_sc_cong_vars <- c("polls_social_control", "congress_riot", "protests_violent")
v2_sc_cong_vars <- c("polls_social_control", "congress_riot", "protests_violent2")

pairs <- as.data.frame(t(combn(3, 2)))

nv_cr_cong_pairs <- map_dfr(pairs, function(x) nv_cr_cong_vars[x])
v1_sc_cong_pairs <- map_dfr(pairs, function(x) v1_sc_cong_vars[x])
v2_sc_cong_pairs <- map_dfr(pairs, function(x) v2_sc_cong_vars[x])

pairs_cong_df <- rbind(nv_cr_cong_pairs, v1_sc_cong_pairs, v2_sc_cong_pairs)
names(pairs_cong_df) <- c("var1", "var2")

# news vs congress pairs
news_cong_riot <- c("news_riot", "congress_riot")
news_cong_rights <- c("news_rights", "congress_rights")

pairs_cong_df <- rbind(pairs_cong_df, news_cong_rights, news_cong_riot)

polls_indicator <- str_detect(pairs_cong_df$var1, "polls") + 1
#v_indicator <- str_detect(pairs_cong_df$var2, "riot|_violent") + 1
v2_indicator <- str_detect(pairs_cong_df$var2, "_violent2$") + 1

pairs_cong_df$time_unit <- c("date", "month")[polls_indicator]
pairs_cong_df$start_date <- c(as.Date("1960-01-01"), as.Date("1964-01-01") )[v2_indicator]    
pairs_cong_df$end_date <- c(as.Date("1972-12-31"), as.Date("1971-12-31") )[v2_indicator]



## ---- set_up_granger_df3, echo=FALSE ----
pl_riot_dvs <- c("news_riot", "congress_riot", "polls_social_control")
pl_rights_dvs <- c("news_rights", "congress_rights", "polls_civil_rights")
pl_rights_iv <- c("protests_nonviolent")
pl_riot_ivs <- c("protests_violent", "protests_violent2")

pl_df_var1 <- c(pl_riot_dvs, pl_rights_dvs, pl_rights_dvs)
pl_df_var2 <- c(rep(pl_rights_iv,3), rep(pl_riot_ivs, 1, each=3) )

pairs_placebo_df <- data_frame(var1 = pl_df_var1, var2 = pl_df_var2)


names(pairs_placebo_df) <- c("var1", "var2")

# news vs congress pairs
polls_indicator <- str_detect(pairs_news_df$var1, "polls") + 1
v2_indicator <- str_detect(pairs_placebo_df$var2, "_violent2$") + 1

pairs_placebo_df$time_unit <- c( "month", "date")[polls_indicator]
pairs_placebo_df$start_date <- c(as.Date("1960-01-01"), as.Date("1964-01-01") )[v2_indicator]
pairs_placebo_df$end_date <- c(as.Date("1972-12-31"), as.Date("1971-12-31") )[v2_indicator]





## ---- set_up_granger_df4, echo=FALSE ----
pairs_df <- rbind(pairs_news_df, pairs_cong_df, pairs_placebo_df) %>%
    distinct()

nv_ind <- grep("rights", pairs_df$var1)
v_ind <- grep("riot|social_control", pairs_df$var1)




## ---- protestsbyyear,  fig.pos="h!", fig.cap="When did the 1960s Black-led nonviolent and violent protest activity peak?", echo=FALSE, warning=FALSE, message=FALSE, error=FALSE, fig.width=9, fig.height=5 ----


# Custom hook for Note from:
# https://stackoverflow.com/questions/36047072/knitr-add-figure-notes

# restore regular plotting function
knit_hooks$set(plot = regular_plot)

# Custom knitr hook to add notes to the plot
knit_hooks$set(plot = function(x, options) {
  paste("\n\\end{kframe}\n\\begin{figure}[h!]\n",
        "\\includegraphics[width=\\maxwidth]{",
        opts_knit$get("base.url"), paste(x, collapse = "."),
        "}\n",
        "\\begin{minipage}{6in}\\small\\begin{flushleft}
\\textit{Note}: The $y$-axis for nonviolent protests is about 10 times the scale of the $y$-axis for violent protests. Trend lines drawn with loess curves. Data: DCA.
\\end{flushleft}\\end{minipage}
",
        "\\caption{", options$fig.cap, " \\label{", options$fig.lp, opts_current$get("label"), "}}\n",
        "\n\\end{figure}\n\\begin{kframe}\n",
        sep = '')
})



load(here("data/DCA_protest_data.Rdata"), verbose = TRUE )



nonviol <- aggregate(part3~evmm + evyy, data=protest.data[protest.data$igrp1c1==401 & protest.data$v2==0,], sum, na.rm=TRUE)

viol <- aggregate(part3~ evmm + evyy, data=protest.data[protest.data$igrp1c1==401 & protest.data$v2==1,], sum, na.rm=TRUE)

nonviol$event_date <- as.Date(paste(nonviol$evyy,sub("^([1-9])$","0\\1",nonviol$evmm),sub("^([1-9])$","0\\1",1),sep="-"))

viol$event_date <- as.Date(paste(viol$evyy,sub("^([1-9])$","0\\1",viol$evmm),sub("^([1-9])$","0\\1",1),sep="-"))

nonviol$label <- "Nonviolent"
viol$label    <- "Violent"

colors_three_brewer <- brewer.pal(3, "Set2")#[c(1,3,2)]

nonviol$col <- colors_three_brewer[1] #"#119911"
viol$col    <- colors_three_brewer[3] #"#991111"

nvvagg <- rbind(nonviol, viol)

nvvagg <- nvvagg[,c("event_date", "part3", "label", "col")]

nvvagg <- nvvagg[nvvagg$event_date >= as.Date("1960-01-01") & nvvagg$event_date <= as.Date("1972-12-31"),]

#nvvagg <- as.factor(nvvagg$col)

sp <- 0.075
sz <- 1.0
tx <- 4

p <- ggplot(data=nvvagg, aes(x=event_date, y=part3, color=col )) + 
	xlab("Date") + ylab("Estimated Number of Protesters per Month") + 
	ggtitle("") + 
	geom_point(aes(event_date, part3), size=sz) + 
	geom_smooth(aes(event_date, part3), se=FALSE,  span=sp,  size = sz) + 
	scale_x_date(date_labels="%Y", date_breaks="year") 


p <- p + facet_grid(label ~ ., scales = "free") + 
    theme(legend.position="none",
            plot.margin=unit(c(0,0,0, 0), "cm")
	) + 
    scale_y_continuous(labels = comma) +
    scale_colour_viridis_d(begin = 0, end = .8, alpha = .8) 
    

p


# restore regular plotting function
knit_hooks$set(plot = regular_plot)


## ---- choro2, fig.width=10, eval=TRUE, fig.pos="h!", fig.cap="Choropleth map of estimated exposure to black-led violent protests following King's assassination on April~4th, 1968. \\label{fig:choro_countyexposure}"  ----


## convert existing state and county variables to lowercase and strip off extra words
pdata_mlk$state <- tolower(as.character(pdata_mlk$state))
pdata_mlk$county <- tolower(pdata_mlk$county)

pdata_mlk$county <- sub("^(.*) (county|city|parish), ..$","\\1", pdata_mlk$county)
pdata_mlk$county <- sub("^(.*)(\\/.*)","\\1", pdata_mlk$county) # remove second half of name/name

## create a key that matches the format used by the map package
pdata_mlk$mpname <- paste(pdata_mlk$state,pdata_mlk$county,sep=",")

## correct a few county name differences 
pdata_mlk$mpname[which(pdata_mlk$mpname=="florida,dade")] <- "florida,miami-dade"
pdata_mlk$mpname[which(pdata_mlk$mpname=="florida,okaloosa")] <- "florida,okaloosa:main"



## create vector of county names
mp <- maps::map("county", plot=FALSE, namesonly=TRUE)


p_effect <- pdata_mlk$p_effect



qnt.r <- unique(quantile(p_effect, probs=seq(0,1,length.out=7)) )

pdata_mlk$ri <- as.numeric(cut(p_effect, qnt.r , include.lowest=TRUE )) # bins


missing.counties <- base::setdiff(mp,pdata_mlk$mpname)
l <- length(missing.counties)
#l
missing <- pdata_mlk[1:l,]
missing[1:l,] <- NA

missing$p_effect <- 0

missing$mpname <- missing.counties


state.ri <- aggregate(pdata_mlk$ri, by=list(pdata_mlk$state), FUN="mean") 
state.ri$x <- round(state.ri$x,0)
#head(state.ri)

#sub("^(.*)(\\,.*)","\\1", pdata_mlk$mpname)
missing$state <-  str_split_fixed(missing$mpname, ",", n=2)[,1]

missing.matches <- match(missing$state, state.ri$Group.1)

missing$ri <- state.ri$x[missing.matches]

missing$ri[which(missing$mpname== "minnesota,mahnomen")] <- pdata_mlk$ri[grep("minnesota,becker", pdata_mlk$mpname)]
missing$ri[which(missing$mpname== "iowa,obrien")] <- pdata_mlk$ri[grep("iowa,clay$", pdata_mlk$mpname)]

pdata_mlk <- rbind(pdata_mlk, missing)


colors_six <- c("#F1EEF6", "#D4B9DA", "#C994C7", "#DF65B0", "#DD1C77", "#980043")
#colors_six <- c("#FFFFFF", "#D4B9DA", "#C994C7", "#DF65B0", "#DD1C77", "#980043")

#colors_six <- c("#F1EEF6","#DD1C77")
          
mp <- maps::map("county", plot=FALSE, namesonly=TRUE)

#maps::map("county", col=colors_six[pdata_mlk[match(mp, pdata_mlk$mpname),]$ri],fill=TRUE, border="lightgrey")

maps::map("county", col=colors_six[pdata_mlk[match(mp, pdata_mlk$mpname),]$p_effect+1], fill=TRUE, border="lightgrey", mar=rep(0,4) )

maps::map("state",col = "white", fill=FALSE, add=TRUE, lty=1, lwd=2, ylim=c(25.1, 49.4), mar=rep(0,4) )

leg.txt <- NA
for (i in 1:(length(qnt.r) -1) ) {leg.txt[i] <- paste(round(qnt.r[i],2),round(qnt.r[i+1],2), sep="-") }

leg.txt <- c("0", "1")
title.text <- paste("At least ", arr_thresh, " arrests \n", "w/in ", dis_thresh," miles", sep="")
#title.text <- paste("At least 49 arrests \n", "w/in ", d," miles", sep="")

legend("bottomright", leg.txt, title=title.text, horiz = FALSE, fill = colors_six, bty="n")




## ---- plm.lag.white.table, echo=FALSE, results='asis'  ----


covar_labels <- c(
    "Protest `Treatment'",
    "log(PC Local Gov Exp)",
    "\\% HS+ Educ",
    "\\% Black",
    "(\\% Black)$^2$",
    "Median Age",
    "Median Income (000s)",
    "\\% Unemployment",
    "\\% Urban",
    "log(Population)",
    "\\% Owner Occ Housing",
    "\\% Pop Growth",
    "\\% Pop Foreign",
    "Lagged Dem Vote Share"
)

# requires remove_latex function loaded
covar_labels <- clean_covar_labels(covar_labels)

stargazer(
    plm.nv.lag,
    plm.nv.lag.white,
    plm.v.lag,
    plm.v.lag.white,
    plm.v2.lag,
    plm.v2.lag.white,
    se = list(
        se1.lag,
        se1.lag.white,
        se2.lag,
        se2.lag.white,
        se3.lag,
        se3.lag.white
    ),
    align            = TRUE,
    type             = star_format,
    float            = FALSE,
    star.cutoffs     = star.cut.vector,
    notes.append     = FALSE,
    notes            = "*$p<0.05$",
    digits           = 2,
    font.size        = 'scriptsize',
    dep.var.caption  = "DV: County-level Democratic Presidential vote-share",
    dep.var.labels   = c(""),
    model.numbers    = TRUE,
    column.labels    = c(
        "Nonviolent Protests (DCA data)",
        "Violent Protests (DCA data)",
        "Violent Protests (Carter data)"
    ),
    column.separate  = c(2, 2, 2) ,
    float.env        = "table",
    covariate.labels = covar_labels,
    style            = "default",
    omit.stat        = c("f", "adj.rsq") ,
    add.lines        = list(
        c("County fixed effects?", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"),
        #c(paste("County at least ", (100 * min_black_threshold), "\\% black?", sep=""), "Yes", "No", "Yes", "No", "Yes", "No"),
        c(
            paste("County at least ", white_percent, "\\% white?", sep = ""),
            "No",
            "Yes",
            "No",
            "Yes",
            "No",
            "Yes"
        )
    )
)



## ---- plm.lag.white.match.table, echo=FALSE, results='asis'  ----

covar_labels <- c(
    "Protest `Treatment'",
    "log(PC Local Gov Exp)",
    "\\% HS+ Educ",
    "\\% Black",
    "(\\% Black)$^2$",
    "Median Age",
    "Median Income (000s)",
    "\\% Unemployment",
    "\\% Urban",
    "log(Population)",
    "\\% Owner Occ Housing",
    "\\% Pop Growth",
    "\\% Pop Foreign",
    "Lagged Dem Vote Share"
)

# requires remove_latex function loaded
covar_labels <- clean_covar_labels(covar_labels)

stargazer(
    plm.nv.lag.match,
    plm.nv.lag.match.white,
    plm.v.lag.match,
    plm.v.lag.match.white,
    plm.v2.lag.match,
    plm.v2.lag.match.white,
    se = list(
        se1.lag.match,
        se1.lag.match.white,
        se2.lag.match,
        se2.lag.match.white,
        se3.lag.match,
        se3.lag.match.white
    ),
    align            = TRUE,
    type             = star_format,
    float            = FALSE,
    star.cutoffs     = star.cut.vector,
    notes.append     = FALSE,
    notes            = "*$p<0.05$",
    digits           = 2,
    font.size        = 'scriptsize',
    dep.var.caption  = "DV: County-level Democratic Presidential vote-share",
    dep.var.labels   = c(""),
    model.numbers    = TRUE,
    column.labels    = c(
        "Nonviolent Protests (DCA data)",
        "Violent Protests (DCA data)",
        "Violent Protests (Carter data)"
    ),
    column.separate  = c(2, 2, 2) ,
    float.env        = "table",
    covariate.labels = covar_labels,
    style            = "default",
    omit.stat        = c("f", "adj.rsq") ,
    add.lines        = list(
        c("County fixed effects?", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"),
        #c(paste("County at least ", (100 * min_black_threshold), "\\% black?", sep=""), "Yes", "No", "Yes", "No", "Yes", "No"),
        c(
            paste("County at least ", white_percent, "\\% white?", sep = ""),
            "No",
            "Yes",
            "No",
            "Yes",
            "No",
            "Yes"
        )
    )
)





## ---- load_pdata1, include = FALSE ----

set.seed(1)

# load combined electoral, demographic, carter, rain, dca data
load(here("data/voting_census_carter_rain_dca.Rdata"), verbose = TRUE)


pdata_nv  <- vc2 %>% rename(p_effect = dca_nv_bin) %>% 
    select(-matches('^car_|^dca_|^pwall|^prep|^pto|^pttl|^poth'))
pdata_v   <- vc2 %>% rename(p_effect = dca_v_bin)  %>% 
    select(-matches('^car_|^dca_|^pwall|^prep|^pto|^pttl|^poth'))
pdata_v2  <- vc2 %>% rename(p_effect = car_bin)    %>% 
    select(-matches('^car_|^dca_|^pwall|^prep|^pto|^pttl|^poth'))


## ---- setup_formula_functions ----
form_lag <- formula(pdem ~ p_effect + locgov_exp_pc_log + per_hsplus + per_black + per_black_sq + med_age + med_income + per_unemp + per_urban + tot_pop_log + per_owner_housing + per_pop_growth + per_pop_foreign + plag)


p_match <- function(pdata, pyear, white = FALSE, method = "CBPS", calip = .1, verbose = FALSE){
    
    pdata <- pdata %>% rownames_to_column() %>%
        dplyr::select(-rowname, everything() ) # move rowname to end for unit, time as first two cols for panel data 

    black_threshold <- ifelse(white, 10, 100)
    
    pdata_one_year <- pdata %>%
        filter(pyear == year &
                   per_black <= black_threshold)
    
    if(verbose) print(dim(pdata_one_year))
    
    # CBPS match
    if(method == "CBPS") {
      match_out <- CBPS(
        p_effect ~ per_black + tot_pop_log + per_urban + per_pop_foreign + south2,
        id    = "fips",
        treat = "p_effect",
        ATT   = FALSE,
        data  = pdata_one_year
    )
    
    ww <- data.frame(fips = pdata_one_year$fips, weights = match_out$weights)
      
    }

    if(method == "PS") {

    # PS match
    match_out <- matchit(
        p_effect ~ per_black + tot_pop_log + per_urban + per_pop_foreign + south2,
        method   = "nearest",
        data     = pdata_one_year,
        caliper  = calip,
        distance = "logit"
    )
    

    ww <- data.frame(fips = match_out[["model"]][["data"]][["fips"]], 
                     weights = match_out$weights)
    #ww <- data.frame(fips = match_data$fips, weights = 1)
    
    }
    
    if(verbose) print(dim(ww))
    if(verbose) print(sum(ww$weights))
    #if(verbose) print(head(ww))
    
    pdata <- left_join(pdata, ww, by = "fips") %>% 
        mutate(weights = replace_na(weights, 0)) %>%
        arrange(fips, year)
    
    if(verbose) print(dim(pdata))
    
    return(pdata)
}



## ---- setup_reg_params ----
pdata  <- rep(c("pdata_nv", "pdata_v", "pdata_v2"), each = 2, times = 2)
pyear  <- rep(c(1964, 1968, 1968), each = 2, times = 2)
white  <- rep(c(FALSE, TRUE), times = 6)
method <- rep(c("PS", "CBPS"), each = 6) 

panel_parameters <- data.frame(
                               pdata = pdata, 
                               pyear = pyear, 
                               white = white, 
                               method = method, 
                               stringsAsFactors = FALSE
                               )


panel_parameters <- panel_parameters %>%
    mutate(
        plabel = case_when(
                           pdata == "pdata_nv" ~ "nonviolent protests (DCA data)",
                           pdata == "pdata_v"  ~ "violent protests (DCA data)",
                           pdata == "pdata_v2" ~ "violent protests (Carter data)"
                           ),
        
        pmethod = case_when(
                           method == "PS"   ~ "propensity score",
                           method == "CBPS" ~ "CBPS"
                           )
        )
    


## ---- matched_data1  ----
# run multiple variations of unit, unit+time, propensity score and CBPS
# matched panel models
matched_data <- list()
plm_out <- list()

for (i in 1:12) {
    matched_data[[i]] <-
        p_match(
            pdata  = eval(parse(
                       text = paste(panel_parameters$pdata[i], " %>% na.omit()" ))),
            pyear  = panel_parameters$pyear[i],
            white  = panel_parameters$white[i],
            method = panel_parameters$method[i]
        )
}

md <- list()

for (i in 1:12) {

  md[[i]] <- matched_data[[i]]
    
  if(panel_parameters$white[i])
    {md[[i]] <- md[[i]] %>%
                  filter(per_black <= max_black_threshold)}
  
    plm_out$pool[[i]] <- plm(formula = form_lag,
                             #weights= rep(1, times = l), 
                             index   = c("fips", "year"),
                             data    = md[[i]], 
                             model   = "pooling"
    )
    
    # PS = subset: CBPS, != subset
    if(panel_parameters$method[i] == "PS") 
      {md[[i]] <- md[[i]] %>%
                    filter(weights > 0)}
    
    plm_out$two[[i]] <- plm( formula = form_lag,
                             weights = weights, 
                             index   = c("fips", "year"),
                             data    = md[[i]],
                             model   = "within",
                             effect  = "twoways")

    
    plm_out$ind[[i]] <- plm( formula = form_lag,
                             weights = weights, 
                             index   = c("fips", "year"),
                             data    = md[[i]], 
                             model   = "within",
                             effect  = "individual")

    #rm(md)

    }



## ---- custom_stargazer_function ----

star2 <- function(plm_list, ptitle) {
    star.cut.vector <- c(0.05, NA, NA)
    
    covar_labels = c(
        "Protest `Treatment'",
        "log(PC Local Gov Exp)",
        "\\% HS+ Educ",
        "\\% Black",
        "(\\% Black)$^2$",
        "Median Age",
        "Median Income (000s)",
        "\\% Unemployment",
        "\\% Urban",
        "log(Population)",
        "\\% Owner Occ Housing",
        "\\% Pop Growth",
        "\\% Pop Foreign",
        "Lagged Dem Vote Share"
    )
    
    
    covar_labels <- clean_covar_labels(covar_labels)
    
    
    #star_out <-
    #capture.output(
    stargazer(
        plm_list,
        type             = star_format,
        title            = ptitle,
        align            = TRUE,
        omit.stat        = c("f", "adj.rsq"),
        omit             = "Constant",
        digits           = 2,
        digits.extra     = 2,
        #float           = FALSE,
        star.cutoffs     = star.cut.vector,
        notes.append     = FALSE,
        notes            = "*$p<0.05$",
        font.size        = 'scriptsize',
        dep.var.caption  = "DV: County-level Democratic Presidential Vote-share",
        dep.var.labels   = c(""),
        
        column.labels    = c("No FE", "C FE", "C-Y FE",
                          "Wht, No FE", "Wht, C FE", "Wht, C-Y FE"),
        #column.separate = c(3,3),
        covariate.labels = covar_labels,
        
        add.lines        = list(
            c(
                paste("County >= 90\\% white?", sep = ""),
                "\\textrm{No}",
                "\\textrm{No}",
                "\\textrm{No}",
                "\\textrm{Yes}",
                "\\textrm{Yes}",
                "\\textrm{Yes}"
            ),
            c(
                "Matching?",
                "\\textrm{No}",
                "\\textrm{Yes}",
                "\\textrm{Yes}",
                "\\textrm{No}",
                "\\textrm{Yes}",
                "\\textrm{Yes}"
            ),
            c(
                "County fixed effects?",
                "\\textrm{No}",
                "\\textrm{Yes}",
                "\\textrm{Yes}",
                "\\textrm{No}",
                "\\textrm{Yes}",
                "\\textrm{Yes}"
            ),
            c(
                "Year fixed effects?",
                "\\textrm{No}",
                "\\textrm{No}",
                "\\textrm{Yes}",
                "\\textrm{No}",
                "\\textrm{No}",
                "\\textrm{Yes}"
            )
        ) # list
    ) # stargazer
    #) # capture output || using this seems to prevent printing of tables
    

    
}



## ---- pdata_tables2, eval = TRUE, results = 'asis' ----
# for stargazer

         
#i <- 1
for (i in c(1,3,5) ) {
    plm_list <- list(plm_out$pool[[i]], plm_out$ind[[i]], plm_out$two[[i]], 
          plm_out$pool[[i+1]], plm_out$ind[[i+1]], plm_out$two[[i+1]])
    
    ptitle <- glue('Panel models of {panel_parameters$plabel[i]} on change in county-level democratic presidential vote-share, 1964-1972 (with 90\\% white counties, {panel_parameters$pmethod[i]} matching, county and year fixed effects)')

    star2(plm_list, ptitle)
}

for (i in c(7,9,11) ) {
    plm_list <- list(plm_out$pool[[i]], plm_out$ind[[i]], plm_out$two[[i]], 
          plm_out$pool[[i+1]], plm_out$ind[[i+1]], plm_out$two[[i+1]])

    ptitle <- glue('Panel models of {panel_parameters$plabel[i]} on change in county-level democratic presidential vote-share, 1964-1972 (with 90\\% white counties, {panel_parameters$pmethod[i]} matching, county and year fixed effects)')
    #print(ptitle)
    star2(plm_list, ptitle)
}



## ---- table.ols.match.white, echo=FALSE, results='asis'  ----


covar_labels <- c(
    "Protest `Treatment'",
    "log(PC Local Gov Exp)",
    "\\% HS+ Educ",
    "\\% Black",
    "(\\% Black)$^2$",
    "Median Age",
    "Median Income (000s)",
    "\\% Unemployment",
    "\\% Urban",
    "log(Population)",
    "\\% Owner Occ Housing",
    "\\% Pop Growth",
    "\\% Pop Foreign",
    "Lagged Dem Vote Share",
    "South"
)

covar_labels <- clean_covar_labels(covar_labels)

stargazer(
    ols68.unmatched,
    ols68.unmatched.white,
    ols68.maha,
    ols68.maha.white,
    ols68.cbps,
    ols68.cbps.white,
    align            = TRUE,
    type             = star_format,
    star.cutoffs     = star.cut.vector,
    notes.append     = FALSE,
    notes            = "*$p < 0.05$",
    float            = FALSE,
    font.size        = 'scriptsize',
    dep.var.caption  = "DV: County-level Democratic Presidential Vote-share",
    dep.var.labels   = c(""),
    model.numbers    = TRUE,
    column.labels    = c("OLS", "Mahalanobis",  "CBPS"),
    column.separate  = c(2, 2, 2),
    float.env        = "table",
    label            = "tab:ols68",
    digits           = 2,
    covariate.labels = covar_labels,
    style            = "default",
    omit.stat        = c("ser", "f", "adj.rsq") ,
    omit             = "Constant",
    add.lines        = list(c(
        paste("County at least ", white_percent, "\\% white?", sep = ""),
        "\\textrm{No}",
        "\\textrm{Yes}",
        "\\textrm{No}",
        "\\textrm{Yes}",
        "\\textrm{No}",
        "\\textrm{Yes}"
    ))
)
  



## ---- table.rainfall.instrument.white, eval=TRUE, echo=FALSE,  message=FALSE, warning=FALSE, error=FALSE, results='asis'  ----

    

covar_labels <- c(
    "Protest `Treatment'",
    "log(PC Local Gov Exp)",
    "\\% HS+ Educ",
    "\\% Black",
    "(\\% Black)$^2$",
    "Median Age",
    "Median Income (000s)",
    "\\% Unemployment",
    "\\% Urban",
    "log(Population)",
    "\\% Owner Occ Housing",
    "\\% Pop Growth",
    "\\% Pop Foreign",
    "Lagged Dem Vote Share",
    "South"
)


covar_labels <- clean_covar_labels(covar_labels)

stargazer(
    se1.ivpre.white, 
    se2.ivpre.match.white, 
    se3.ivweek.white, 
    se4.ivweek.match.white, 
    se5.ivmonth.white, 
    se6.ivmonth.match.white,
    align            = TRUE,
    type             = star_format,
    float            = FALSE,
    star.cutoffs     = star.cut.vector,
    notes.append     = FALSE, 
    notes            = "*$p<0.05$",
    font.size        = 'scriptsize',
    dep.var.caption  = "DV: County-level Democratic Presidential Vote-share",
    dep.var.labels   = c(""),
    model.numbers    = TRUE,
    column.labels    = c("Placebo (Rain Apr 1-3)",  "Week (Apr 4-10)",  "Placebo (Apr 11-30)"),
    column.separate  = c(2, 2, 2),
    float.env        = "table",
    digits           = 2,
    title            = "",
    covariate.labels = covar_labels,
    omit             = "Constant",
    #omit.stat       = c("ser", "f", "adj.rsq"),
    add.lines        = list(c(
    paste("County at least ", white_percent, "\\% white?", sep = ""),
    "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"),
    c("Matching?", "\\textrm{No}", "\\textrm{Yes}", "\\textrm{No}", "\\textrm{Yes}", "\\textrm{No}", "\\textrm{Yes}")
    ) # add.lines
    ) # stargazer call  



## ---- nyt_poisson_out_table, results = 'asis' ----

stargazer(
  plist,        # list containing regression objects
  align            = TRUE,
  type             = star_format,
  omit             = "year|state",
  float            = FALSE,
  star.cutoffs     = star.cut.vector,  # c(0.05, NA, NA)
  notes.append     = FALSE,
  notes            = "*$p<0.05$",
  digits           = 2,
  font.size        = "scriptsize",
  dep.var.labels   = c("Number of", "Number of", "Page"),
  column.labels    = c("Articles", "Paragraphs", "Number"),
  model.numbers    = TRUE,
  float.env        = "table",
  covariate.labels = c("Police violence", "Protester violence", "Police violence * Protester violence"),
  style            = "default",
  add.lines        = list(c("Year fixed effects?", "\\textrm{Yes}", "\\textrm{Yes}", "\\textrm{Yes}"))
  #title           = "Poisson GLM models of police- and protester-initiated violence on change in \\textit{New York Times} coverage \\label{tab:nyt_glm}"
)



## ---- nyt_marginal_effects_police_protester_violence, results = 'asis' ----

# reproduced frpm coef-plot elsewhere
p_df <- list(NA) 

# extract average marginal effects
for (i in seq_along(1:3)) {
  p_df[[i]] <- plist[[i]] %>% 
    margins(model = ., 
            variables = c("police_violence", "protester_violence")) %>% 
    summary() %>% 
    slice(1:2) # police & protester violence 
}



options(digits = 3)

# names(p_df[[1]])[1] <- "Number of Articles"
# names(p_df[[2]])[1] <- "Number of Paragraphs"
# names(p_df[[3]])[1] <- "Page Number"


p3_df <- do.call(rbind, p_df) %>%
     mutate(
         Predictor = rep(c("Police violence", "Protester violence"), 3)
            ) %>%
     rename(
         `\\textit{z}` = z,
         `\\textit{p}` = p
            ) %>%
    dplyr::select(Predictor, everything(), -factor)



p3_df %>%
    kable(format = 'latex', 
          align = "l",
          booktabs = TRUE,
          escape = FALSE,
          caption = "Average Marginal Effects of Police and Protester Violence on \\textit{New York Times} Coverage") %>%
    kable_styling(full_width = FALSE) %>%
    column_spec(column = 1, width = "10em") %>%
    group_rows("DV: Number of articles", 1, 2, latex_gap_space = ".75em") %>%
    group_rows("DV: Number of paragraphs", 3, 4, latex_gap_space = ".75em") %>%
    group_rows("DV: Page number", 5, 6, latex_gap_space = ".75em") %>%
    footnote(general = "Average marginal effects of violence by police and violence by protesters on three measures of coverage of black protest events in the New York Times. Poisson regression used to model count data. Predictors in each model were police violence and protester violence along with fixed effects for  event year. Fixed effects not shown and results robust to their exclusion. Number of articles is per event. Number of paragraphs is per article. Page number reflects distance from front page (page one).  Data source: DCA.",
             threeparttable = TRUE)
    



## ---- blackpartyid, fig.pos="h!", fig.cap="Scatter plot of black party identification, 1936 to 2012.  Lines drawn with Loess smoothing function.", echo=FALSE, fig.width=8, fig.height=5, warning=FALSE, error=FALSE,  message=FALSE  ----


pid <- read.csv(here("data/Bositis_Party_Identification/blackpartyid1936_2012.csv") )

pid <- pid[pid$Category=="Party identification",] ## remove rows associated with "Presidential vote"

pid[ ,c("Democratic", "Republican", "Other")] <- pid[,c("Democratic", "Republican", "Other")] * .01

p <- ggplot(data=pid, aes(y = Democratic, x = Year) ) 

# "blue3"
p <- p + 
    geom_point(color  = colors_map[1]) +
    geom_smooth(color = colors_map[1], method = loess) + 
    scale_y_continuous("Percentage of Blacks Identified with Party", 
                      labels = percent, 
                      limits = c(0, 1))

# "darkred"
p <- p + geom_point(aes(Year, Republican), 
                   color = colors_map[3]) + 
   geom_smooth(aes(Year, Republican), 
               color = colors_map[3], method = "loess") + 
   geom_point(aes(Year, Other),  color = colors_map[2]) + # "darkgreen"
   geom_smooth(aes(Year, Other), color = colors_map[2], method = loess) # "darkgreen"

tx <- 4

p <- p + 
   geom_shadowtext(aes(label = label), 
             data.frame(Year = 1950.5, Democratic = .79, label = "Democrats"), 
             hjust    = 0,
             size     = tx,
             colour   = 'grey29',#colors_map[1],
             bg.color = 'white',
             bg.r     = bg_ratio) +
    
   geom_shadowtext(aes(label=label), 
             data.frame(Year = 1950.5, Democratic = .33, label = "Republicans"), 
             hjust    = 0,
             size     = tx,
             colour   = 'grey29', #"darkred", #colors_map[3],
             bg.color = 'white',
             bg.r     = bg_ratio) +
    
   geom_shadowtext(aes(label = label), 
             data.frame(Year = 1950.5, Democratic = .10, label = "Other"),
             hjust    = 0,
             size     = tx,
             colour   = 'grey29',# colors_map[2],
             bg.color = 'white',
             bg.r     = bg_ratio) 

p + theme_apsr_grid


## ---- granger_bulk, results='asis', echo=FALSE ----

gr_bulk <- function(args) {
    
    eval(parse(text=paste0("
        gr_all( all_data, 
        var1    = '",args[[1]],"', 
        var2    = '",args[[2]],"', 
        start   = '",args[[4]],"', 
        end     = '",args[[5]],"', 
        period  = ",args[[3]],",
        order   = 1,
        filler  = 0,
        ndiff   ='auto',
        verbose = FALSE) ") ) ) 

    }

str_sentence_case <- function(x) {paste0(toupper(substr(x, 1, 1)), substr(x, 2, nchar(x))) }

l <- nrow(pairs_df)
    
temp <- purrr::map(1:l, function(x) {
    if(x %in% seq(from=1, to=(l-1), by=3)) {cat("\\clearpage \n")}
    cat("\\subsection{", 
        str_sentence_case(nameify(pairs_df[x,1])), 
        " vs.\ ",
        nameify(pairs_df[x,2]), 
        #paste0("(by ", pairs_df[x,3],")"),
        "}\n") 
    gr_bulk(as.vector(pairs_df[x,]) )
    })



## ---- cointegration_setup, results = 'hide' ----

library(dplyr)

library(kableExtra)

# Create dataframe to hold columns and results
cointresult <- dplyr::tibble(
  x           = rep(c("protests_nonviolent", "protests_violent", "protests_violent2"), 
                    each = 3),
  y           = c("news_rights", "congress_rights",  "polls_civil_rights", 
                  rep(c(  "news_riot", "congress_riot", "polls_social_control"), 2)),
  start       = c(rep("1960-01-01", 3), rep("1964-01-01", 6)),
  end         = "1972-12-31",
  period      = rep(c("date", "date", "month"), 3),
  k_lag       = rep(c(30, 30, 12), 3), # first and second lags for daily data, last lag for monthly
  adft_stat   = NA,
  pval        = NA,
  cointegrate = NA
)


load(here("data/all_data_attica_congress_no_nyt.Rdata"))


# Function for subsetting
pullcols <- function(x, data) {
  vars <- c(cointresult[x, 1], cointresult[x, 2])
  subdata <- data %>%
    dplyr::filter(type_cat == vars) %>%
    dplyr::filter(date %in% as.Date(cointresult$start[x]):as.Date(cointresult$end[x])) %>%
    dplyr::select(cointresult$period[x], value, type_cat) %>%
    dplyr::rename(period = cointresult$period[x])
  as_tibble()
  return(subdata)
}

# coarsen date to month using first day of month
monthify <- function(dates) {
  as.Date(cut(dates,
    breaks = "month"
  ))
}

# Function for cointegration
cointegration <- function(x, y, k_lag, x_lag = FALSE) {
  if (x_lag == TRUE) {
    x <- dplyr::lag(x, 1)[-1]
    y <- y[-1]
  }
    
  # extract coefficient from regression with no intercept (the plus zero)
  beta <- coef(lm(y ~ x + 0))[1] 
  
test <- tseries::adf.test(y - beta * x,
  alternative = "stationary",
  k = k_lag
  )
testresult <- c(test$parameter, test$statistic, test$p.value)
  return(testresult)
}

# Overall function
coint <- function(data, x_lag = FALSE) {

  # Required libraries
  require(tseries)
  require(stringr)

  # Loop to run this over every row in dataset
  for (i in 1:length(cointresult[1, ])) {
    # dplyr::selecting specific columns
    vars <- c(cointresult[i, 1], cointresult[i, 2])

    # Subsetting necessary data
    subdata <- pullcols(i, data)

    # Creating sequential dates
    days <- seq(from = as.Date(cointresult$start[i]), to = as.Date(cointresult$end[i]), by = "day")
    if (cointresult$period[i] == "month") {
      monthify(days)
    }

    # Create new tibble with all time periods
    all_days   <- tibble(period = days, type_cat = NA, value = 0)
    all_days_1 <- all_days %>% mutate(type_cat = vars[1])
    all_days_2 <- all_days %>% mutate(type_cat = vars[2])

    # Subgrouping by type_cat
    subgroup_1 <- subdata %>% dplyr::filter(type_cat == vars[1])
    subgroup_2 <- subdata %>% dplyr::filter(type_cat == vars[2])

    # Merging subgroups with all_days
    subgroup_1 <- rbind(subgroup_1, all_days_1)
    subgroup_1 <- subgroup_1 %>%
      group_by(period, type_cat) %>%
      summarise(value = sum(value)) %>%
      ungroup()
    subgroup_2 <- rbind(subgroup_2, all_days_2)
    subgroup_2 <- subgroup_2 %>%
      group_by(period, type_cat) %>%
      summarise(value = sum(value)) %>%
      ungroup()

    # Running cointegration
    adfresult <- cointegration(subgroup_1$value, subgroup_2$value, cointresult$k_lag[i], x_lag)

    # Store in cointresult
    cointresult$k_lag[i] <- adfresult[1]
    cointresult$adft_stat[i] <- adfresult[2]
    cointresult$pval[i] <- adfresult[3]

    # Determine co-integration
    cointresult$cointegrate[i] <- ifelse(adfresult[3] <= 0.05, "Yes", "No")
  }
  cointresult$x <- cointresult$x %>%
    stringr::str_replace_all("protests_nonviolent", "Nonviolent (DCA)") %>%
    stringr::str_replace_all("protests_violent$", "Violent (DCA)") %>%
    stringr::str_replace_all("protests_violent2$", "Violent (Carter)")

  cointresult$y <- cointresult$y %>%
    stringr::str_replace_all("congress_rights", "Congress: Rights") %>%
    stringr::str_replace_all("polls_civil_rights", "Polls: Civil Rights") %>%
    stringr::str_replace_all("news_rights", "News: Rights") %>%
    stringr::str_replace_all("congress_riot", "Congress: Riots") %>%
    stringr::str_replace_all("polls_social_control", "Polls: Social Control") %>%
    stringr::str_replace_all("news_riot", "News: Riots")

  cointresult$period <- cointresult$period %>%
    stringr::str_replace_all("date", "Day") %>%
    stringr::str_replace_all("week", "Week") %>%
    stringr::str_replace_all("month", "Month")


  cointresultout <- cointresult %>%
    dplyr::rename(
      "Protest Type"  = x,
      "Outcome"       = y,
      "Start Date"    = start,
      "End Date"      = end,
      "Period"        = period,
      "Lag"           = k_lag,
      "ADF Statistic" = adft_stat,
      "pvalue"        = pval,
      "Cointegrated?" = cointegrate
    )

  return(cointresultout)
}

coint_out <- coint(all_data, TRUE)


# Replacing pvalues with <0.01, since they are all less.
p01_index <- which(coint_out$pvalue <= 0.01)
coint_out$pvalue <- as.character(coint_out$pvalue)
coint_out$pvalue[p01_index] <- "$<$ 0.01"
coint_out$pvalue <- as.character(coint_out$pvalue)

# Rounding values
coint_out$`ADF Statistic` <- round(coint_out$`ADF Statistic`, digits = 2)



## ---- print_cointegration_table, results='asis' ----

coint_out %>%
    dplyr::select(-`Start Date`, -`End Date`) %>%
  kableExtra::kable(format = "latex", 
                    booktabs = T,
                    linesep = rep(c('', '', '\\addlinespace'),3),
                    col.names = c("Protest Type",
                                  "Outcome",
                                  "Period",
                                  "Lag",
                                  "ADF Statistic",
                                  "$p$-value",
                                  "Cointegrated?"
                                  ),
                    escape = F, # don't escape $ in $p$-value to \$p\$-value
		caption = "Results of nine augmented Dickey-Fuller (ADF) tests to assess whether time series data about protest events exhibits cointegration with relevant time series measuring news headlines, speech in Congress and polls.") %>%
  		kableExtra::kable_styling(
		    latex_options = c("scale_down", "hold_position") 
		    )



## ---- balanceplot.calc, include=FALSE ----


covars.labels <- c("log(Local Gov Exp Per Cap)", "% HS+ Educ", "% Black", "(% Black)^2", "Median Age", "Median Income (000s)", "% Unemployment", "% Urban", "log(Total Pop)", "% Owner Housing", "% Pop Growth", "% Pop Foreign Born", "Pres Vote Lagged", "Southern")


cbps.balance <- plot(mols68.cbps, silent=FALSE )

bal.df <- data.frame(covars.labels, original = cbps.balance$original, balanced = cbps.balance$balanced)     

bal2 <- melt(bal.df, id.vars = "covars.labels")



## ---- cbps_balanceplot, fig.pos="h!", fig.cap="Balance plot of the absolute difference of standardized means between the original, unweighted covariates (dark circles) and the CBPS weighted covariates (open circles). All covariates show an improvement in balance after CBPS weighting.", fig.height=5, fig.width=8  ----

# restore regular plotting function
#knit_hooks$set(plot = regular_plot)

p <- ggplot(bal2, aes(y=covars.labels, x=value, group=variable) )  +

    geom_point(aes(shape=variable), alpha=.6) +

    scale_shape_manual(values=c(19,1), labels=c("Before weighting", "After weighting") ) +

    labs(title="Balance Plot before and after CBPS Weighting", x="Absolute Difference of Standardized Means", y="") + 

    geom_vline(xintercept = 0, col="darkgray") + 
        
    theme( plot.title = element_text(size = 15), text = element_text(size=15), legend.title=element_blank() )  +

    scale_y_discrete(limits=rev(covars.labels) ) #[order(as.character(covars.labels))]) ) 

p


## ---- panel_balance_plot_nv, fig.pos="h!", fig.cap="Covariate balance for panel data with nonviolent protest `treatment' using nearest neighbor propensity score matching with a caliper. Four variables above were identified by prior literature as being predictive of protest activity.", echo=FALSE, fig.height=8, fig.width=8  ----

# restore regular plotting function
#knit_hooks$set(plot = regular_plot)

library(cobalt)
panel.labels <- covars.labels[c(3, 9, 8, 12)]
var.names.new <- data.frame(old=c("tot_pop_log", "per_black", "per_pop_foreign", "per_urban"), 
                new=panel.labels)



love.plot(bal.tab(nv.match), 
     var.names=var.names.new) + 
     ggtitle("Covariate balance, panel data with nonviolent protest 'treatment'")




## ---- panel_balance_plot_v, fig.pos="h!", fig.cap="Covariate balance for panel data with violent protest `treatment' using nearest neighbor propensity score matching with a caliper. Four variables above were identified by prior literature as being predictive of protest activity.", echo=FALSE, fig.height=8, fig.width=8  ----


love.plot(bal.tab(v.match), 
     var.names=var.names.new) + 
     ggtitle("Covariate balance, panel data with violent protest 'treatment'")




## ---- panel_summary_stats_function, echo=FALSE  ----

panel_stats_table <- function(dd) {
    dd$tot_pop_log <- log(dd$tot_pop)
    
    
  
    p_effect_agg <- aggregate(p_effect ~ fips, data=dd, FUN=sum)
    #names(p_effect_agg) <- c("fips", "p_effect_sum")
    match.vector <- match(dd$fips, p_effect_agg$fips)
    dd$p_effect_sum <- p_effect_agg$p_effect[match.vector]

    dd$treated <- dd$p_effect_sum > 0
    
    dd <- data.frame(dd[dd$year==1964, ])

    #stcols <- c(4:6,9:16,18)
    stcols <- c("locgov_exp_pc", "med_age", "med_income", "per_black", "per_hsplus", "per_owner_housing", "per_pop_foreign", "per_pop_growth", "per_unemp", "per_urban", "plag", "tot_pop_log")

    ctrl  <- dd$treated == 0 & dd$per_black >= min_black_threshold
    tr    <- dd$treated == 1 #& dd$per_black >= min_black_threshold
    nctrl <- sum(ctrl, na.rm = TRUE)
    ntr   <- sum(tr, na.rm = TRUE) 
    
    ctrl_means <- c(colMeans(dd[ctrl, stcols], na.rm=TRUE), nctrl )
    ctrl_sd    <- c(apply(dd[ctrl, stcols], 2, sd, na.rm=TRUE), NA )
    tr_means   <- c(colMeans(dd[tr, stcols ], na.rm=TRUE), ntr )
    tr_sd      <- c(apply(dd[tr, stcols], 2, sd, na.rm=TRUE), NA) 
    dd_stats   <- data.frame( ctrl_means, ctrl_sd, tr_means, tr_sd) 
    rownames(dd_stats) <- c("PC Loc Gov Exp", "Median Age", "Median Income", "Per Black", "Per HS+", "Per Own Occ Housing", "Per Pop Foreign", "Per Pop Growth", "Per Unemp", "Per Urban", "Lagged Dem Vote Share", "log(Total Pop)", "N")
    #dd_stats$N <- c(nctrl, "", ntr,"")
    colnames(dd_stats) <- c("Control Mean", "Control SD", "Treated Mean", "Treated SD")

stat_tab <- xtable(dd_stats)
 
 return(stat_tab )
} 






